Load Libraries

Load Mothur OTU Data

#  Load in the taxonomy and shared data
otu_tax <- "mothur/stability.trim.contigs.good.unique.good.filter.unique.precluster.pick.pick.an.unique_list.0.03.cons.taxonomy"
otu_shared <- "mothur/stability.trim.contigs.good.unique.good.filter.unique.precluster.pick.pick.an.unique_list.shared"
otu_dat <- import_mothur(mothur_shared_file = otu_shared, mothur_constaxonomy_file = otu_tax)

# Fix the taxonomy names
colnames(tax_table(otu_data)) <- c("Kingdom","Phylum","Class","Order","Family","Genus","Species")
## Error in colnames(tax_table(otu_data)) <- c("Kingdom", "Phylum", "Class", : object 'otu_data' not found
################################################
########## ADD THE PROTEOBACTERIA TO THE PHYLA
phy <- data.frame(tax_table(otu_data))
## Error in tax_table(otu_data): object 'otu_data' not found
Phylum <- as.character(phy$Phylum)
## Error in eval(expr, envir, enclos): object 'phy' not found
Class <- as.character(phy$Class)
## Error in eval(expr, envir, enclos): object 'phy' not found
for  (i in 1:length(Phylum)){ 
  if (Phylum[i] == "Proteobacteria"){
    Phylum[i] <- Class[i]
  } 
}
## Error in eval(expr, envir, enclos): object 'Phylum' not found
phy$Phylum <- Phylum
## Error in eval(expr, envir, enclos): object 'Phylum' not found
t <- tax_table(as.matrix(phy))
## Error in as.matrix(phy): object 'phy' not found
tax_table(otu_data) <- t
## Error in tax_table(otu_data) <- t: object 'otu_data' not found
################################################

# Sample Names
samp_names <- colnames(otu_table(otu_data))
## Error in otu_table(otu_data): object 'otu_data' not found
# Create metadata info
df <- data.frame(matrix(NA, ncol = 1, nrow = length(samp_names)))
## Error in matrix(NA, ncol = 1, nrow = length(samp_names)): object 'samp_names' not found
colnames(df) <- c("names")
## Error in `colnames<-`(`*tmp*`, value = "names"): attempt to set 'colnames' on an object with less than two dimensions
df$names <- samp_names
## Error in eval(expr, envir, enclos): object 'samp_names' not found

Load in Metadata

# Create metadata info
meta_df <- make_muskegon_metadata(df)

# Create a norep_water_name column
meta_df$norep_water_name <- paste(substr(meta_df$names,1,4),substr(meta_df$names,7,9), sep = "")

# Load in the extra metadata for the Muskegon Lake Project
musk_data <- read.csv("metadata/processed_muskegon_metadata.csv", header = TRUE) # Load in the extra metada
musk_data_subset <- select(musk_data, -c(lakesite, limnion, month, year, project, season))
complete_meta_df <- left_join(meta_df, musk_data_subset, by = "norep_water_name")
## Warning in left_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
## factor and character vector, coercing into character vector
row.names(complete_meta_df) <- complete_meta_df$names

complete_meta_df$water_name <- paste(substr(complete_meta_df$names,1,5),substr(complete_meta_df$names,7,9), sep = "")
complete_meta_df$norep_filter_name <- paste(substr(complete_meta_df$names,1,4),substr(complete_meta_df$names,6,9), sep = "")

Load in Oligotyping Data

##  Read in the Oligotyping Count file 
oligo_count <- read.table("oligotyping/MATRIX-COUNT.txt", header = TRUE, row.names = 1) # Read in the oligotyping file making the first row and columns the column and row names
colnames(oligo_count) <- gsub("X", "", colnames(oligo_count)) # Remove the X that R automatically puts in front of the node numbers in the column names
oligo_count <- oligo_count[ , order(names(oligo_count))] # Order the node numbers to match with taxonomy. 

##  Read in the Oligotyping Taxonomy File
raw_oligo_tax <- read.table("oligotyping/oligo.taxonomy") # Read in the oligotyping file making the first row and columns the column and row names
cols <- data.frame(str_split_fixed(raw_oligo_tax[,1], "size_", 2)) # Requires stringr package!
colnames(cols) <- c("Oligotype", "Size")
#cols$Size <- as.numeric(cols$Size)
cols$Oligotype <- gsub("\\|.*","",cols$Oligotype)
oligo_tax <- cbind(cols, raw_oligo_tax$V2)
colnames(oligo_tax)[3] <- "Taxonomy"
oligo_tax <- arrange(oligo_tax, Oligotype) # Order the node numbers to match with oligotype count table 
oligo_tax$Size = NULL
row.names(oligo_tax) <- oligo_tax$Oligotype
oligo_tax$Oligotype = NULL


# Separate the taxonomy
tax <- data.frame(str_split_fixed(oligo_tax$Taxonomy, ";", 7)) # Requires stringr package!
tax <- apply(tax, 2, function(y) gsub("\\s*\\([^\\)]+\\)","",as.character(y))) # remove parentheses with everything inside it
colnames(tax) <- c("Kingdom","Phylum","Class","Order","Family","Genus", "Species")
tax <- data.frame(tax)
tax$Species <- gsub(";","",tax$Species) # Remove the final ; in the species column
row.names(tax) <- row.names(oligo_tax) 


### Create a phyloseq object
oligoCOUNT <- otu_table(oligo_count, taxa_are_rows = FALSE)
oligoTAX <- tax_table(as.matrix(tax))
oligo_data <- phyloseq(oligoCOUNT, oligoTAX)

Add metadata to the phyloseq objects to Merge samples

# Add the metadata to our phyloseq object! 
##  Add metadata to OTUs
sample_data(otu_data) <- complete_meta_df
otu_data <- prune_taxa(taxa_sums(otu_data) > 0, otu_data)

##  Add metadata to Oligotypes
sample_data(oligo_data) <- complete_meta_df
oligo_data <- prune_taxa(taxa_sums(oligo_data) > 0, oligo_data)

# Merged metadata
merged_complete_meta_df<- 
  select(complete_meta_df, -c(names, replicate, nuc_acid_type, water_name)) %>% 
  distinct() %>%
  arrange(norep_filter_name)


## Add Production data from GVSU AWRI provided by the lab of Bopi Biddanda
bopi_data <- read.csv("metadata/production_data.csv") %>%
  dplyr::rename(norep_filter_name = names)  %>% # rename "names" column to "norep_filter_name"
  select(-c(X, limnion, fraction, month, year, season))

## Merge two metadata files together!
df1 <- full_join(merged_complete_meta_df, bopi_data) %>%
  select(-c(Depth, month)) 
## Joining, by = "norep_filter_name"
## Warning in full_join_impl(x, y, by$x, by$y, suffix$x, suffix$y): joining
## factor and character vector, coercing into character vector
# provide row.names to match sample
row.names(df1) <- df1$norep_filter_name

# merge samples for OTUs
otu_merged <- merge_samples(otu_data, group = "norep_filter_name", fun = "sum")
## Warning in asMethod(object): NAs introduced by coercion
## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion
sample_names(otu_merged) == row.names(merged_complete_meta_df) # Sanity check
##   [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [12] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [23] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [34] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [45] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [56] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [67] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [78] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
##  [89] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [100] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [111] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [122] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [133] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [144] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [155] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [166] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [177] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [188] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [199] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [210] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [221] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [232] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [243] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [254] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [265] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [276] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [287] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [298] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [309] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [320] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [331] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [342] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [353] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [364] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [375] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## [386] FALSE FALSE
# merge samples for Oligotypes
oligo_merged <- merge_samples(oligo_data, group = "norep_filter_name", fun = "sum")
## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion

## Warning in asMethod(object): NAs introduced by coercion
# Add nice sample information
sample_data(otu_merged) <- df1
sample_data(oligo_merged) <- df1

Subset Muskegon Samples

# Subset Mukegon Lake samples out of the OTU phyloseq object
otu_merged_musk <- subset_samples(physeq = otu_merged, project == "Muskegon_Lake")

# Subset Mukegon Lake samples out of the Oligotype phyloseq object
oligo_merged_musk <- subset_samples(physeq = oligo_merged, project == "Muskegon_Lake")

Sample Sequencing Read Counts

# Check the sequencing depth of each sample 
sums_otu <- data.frame(rowSums(otu_table(otu_merged_musk)))
colnames(sums_otu) <- "Sample_TotalSeqs"
sums_otu$sample <- row.names(sums_otu)
sums_otu <- arrange(sums_otu, Sample_TotalSeqs) %>%
  filter(Sample_TotalSeqs < 10000)

##  OTU: SUBSAMPLE AT 6600

####  Create a plot of the number of sequences per sample
ggplot(sums_otu, aes(x=reorder(sample, Sample_TotalSeqs), y = Sample_TotalSeqs)) + 
  ylab("Number of Sequences per Sample") +
  geom_bar(stat = "identity", colour="black",fill="cornflowerblue")  + xlab("Sample Name") + 
  ggtitle("OTU: Samples less than 10,000 reads") + 
  theme(axis.text.x = element_text(colour = "black", size=16, angle=45, hjust = 1, vjust = 1))

# Check the sequencing depth of each sample 
sums_oligo <- data.frame(rowSums(otu_table(oligo_merged_musk)))
colnames(sums_oligo) <- "Sample_TotalSeqs"
sums_oligo$sample <- row.names(sums_oligo)
sums_oligo <- arrange(sums_oligo, Sample_TotalSeqs) %>%
  filter(Sample_TotalSeqs < 5000)

sums_oligo$names <- sums_oligo$sample
o <- make_metadata_norep(sums_oligo)
o1 <- filter(o, fraction == "WholePart" & limnion == "Top") 

# OLIGOTYPING: SUBSAMPLE AT 4300

####  Create a plot of the number of sequences per sample
ggplot(sums_oligo, aes(x=reorder(sample, Sample_TotalSeqs), y = Sample_TotalSeqs)) + 
  ylab("Number of Sequences per Sample") +
  geom_bar(stat = "identity", colour="black",fill="cornflowerblue")  + xlab("Sample Name") + 
  ggtitle("Oligotyping") + 
  theme(axis.text.x = element_text(colour = "black", size=16, angle=45, hjust = 1, vjust = 1))

# Histogram of sample read counts for the read counts of each sample
ggplot(data.frame(sum = sample_sums(oligo_merged_musk)), aes(x = sum)) + 
  geom_histogram(color = "black", fill = "purple", binwidth = 1000) +
  ggtitle("Distribution of sample sequencing depth") + 
  xlab("Read counts") + 
  scale_x_continuous(expand = c(0,0)) + scale_y_continuous(expand = c(0,0)) + 
  theme(axis.title.y = element_blank())

Phenoflow Alpha diversity analysis

OTU Phenoflow analysis

# OTU analysis
# Create a new file called "otu_merged_musk_Physeq.RData" that has the phyloseq object
#save(list=c("otu_merged_musk"), file=paste0("Phenoflow/otu_merged_musk_Physeq.RData")) # Will be run on Flux
otu_phenoflow_alpha <- get(load("Phenoflow/otu_phenoflow/Phenoflow_diversity.RData"))

# Add the rownames so that it can be combined with the metadata
otu_phenoflow_alpha <- data.frame(otu_phenoflow_alpha, norep_filter_name = rownames(otu_phenoflow_alpha))
# Create the metadata information 
meta_dat <- data.frame(sample_data(otu_merged_musk))
# Merge phenoflow data and 
otu_phenoflow_alpha <- merge(otu_phenoflow_alpha, meta_dat, by = "norep_filter_name") %>%
  select(-sd.D0.bre, -D0.bre)



### PREPARE DATA FRAMES FOR PHENOFLOW ALPHA DIVERSITY ANALYSIS
free_otu_alpha <- filter(otu_phenoflow_alpha, fraction == "Free" & norep_filter_name != "MOTHJ715" & year == "2015")
part_otu_alpha <- filter(otu_phenoflow_alpha, fraction == "WholePart" & norep_filter_name != "MOTHJ715" & year == "2015")


# Can fractional production be predicted by phenoflow D0 observed richness? 
# FREE LIVING
free_otu_D0_stats <- lm(frac_bacprod ~ D0, data = free_otu_alpha)
free_otu_D0_stats
## 
## Call:
## lm(formula = frac_bacprod ~ D0, data = free_otu_alpha)
## 
## Coefficients:
## (Intercept)           D0  
##    11.22987      0.01446
summary(free_otu_D0_stats)
## 
## Call:
## lm(formula = frac_bacprod ~ D0, data = free_otu_alpha)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -18.637 -10.917  -4.362   7.947  34.925 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.22987   12.55657   0.894    0.392
## D0           0.01446    0.01297   1.115    0.291
## 
## Residual standard error: 17.34 on 10 degrees of freedom
##   (12 observations deleted due to missingness)
## Multiple R-squared:  0.1106, Adjusted R-squared:  0.02164 
## F-statistic: 1.243 on 1 and 10 DF,  p-value: 0.2909
# PARTICLE
part_otu_D0_stats <- lm(frac_bacprod ~ D0, data = part_otu_alpha)
part_otu_D0_stats
## 
## Call:
## lm(formula = frac_bacprod ~ D0, data = part_otu_alpha)
## 
## Coefficients:
## (Intercept)           D0  
##    -5.75443      0.01852
summary(part_otu_D0_stats)
## 
## Call:
## lm(formula = frac_bacprod ~ D0, data = part_otu_alpha)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -9.558 -2.111 -1.010  4.205  7.624 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)   
## (Intercept) -5.754434   3.816457  -1.508  0.16253   
## D0           0.018516   0.004149   4.463  0.00121 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.007 on 10 degrees of freedom
##   (11 observations deleted due to missingness)
## Multiple R-squared:  0.6658, Adjusted R-squared:  0.6323 
## F-statistic: 19.92 on 1 and 10 DF,  p-value: 0.00121
# Plot D0 Observed Richness
otu_pheno_D0 <- ggplot(otu_phenoflow_alpha, aes(x=D0, y=frac_bacprod, color = fraction)) + 
  geom_point(size = 3.5) + geom_errorbarh(aes(xmin = D0 - sd.D0, xmax = D0 + sd.D0), width = 0.2) + 
  ggtitle("OTU: Phenoflow") +
  scale_color_manual(values = c("firebrick3","cornflowerblue"), limits = c("WholePart", "Free")) +
  scale_x_continuous(limits = c(0,2000)) + 
  scale_y_continuous(limits = c(0,70),expand = c(0,0)) + 
  ylab("Production (μgC/L/hr)") + xlab("Observed Richness (D0)") +
  #geom_smooth(data=subset(otu_phenoflow_alpha, fraction == "Free"), method='lm') + 
  geom_smooth(data=subset(otu_phenoflow_alpha, fraction == "WholePart"), method='lm') + 
  theme(legend.position=c(0.15,0.9), legend.title=element_blank()) +
  annotate("text", x = 1000, y=35, color = "cornflowerblue", fontface = "bold",
           label = paste("R2 =", round(summary(free_otu_D0_stats)$adj.r.squared, digits = 4), "\n", 
                         "p =", round(unname(summary(free_otu_D0_stats)$coefficients[,4][2]), digits = 4))) + 
  annotate("text", x = 1600, y=8, color = "firebrick3", fontface = "bold",
           label = paste("R2 =", round(summary(part_otu_D0_stats)$adj.r.squared, digits = 4), "\n", 
                         "p =", round(unname(summary(part_otu_D0_stats)$coefficients[,4][2]), digits = 4)))
## Warning: Ignoring unknown parameters: width
# Can fractional production be predicted by phenoflow chao1? 
# FREE LIVING
free_otu_D0chao_stats <- lm(frac_bacprod ~ D0.chao, data = free_otu_alpha)
free_otu_D0chao_stats
## 
## Call:
## lm(formula = frac_bacprod ~ D0.chao, data = free_otu_alpha)
## 
## Coefficients:
## (Intercept)      D0.chao  
##   10.294438     0.008147
summary(free_otu_D0chao_stats)
## 
## Call:
## lm(formula = frac_bacprod ~ D0.chao, data = free_otu_alpha)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -17.648 -12.410  -4.374   8.313  31.472 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept) 10.294438  10.885944   0.946    0.367
## D0.chao      0.008147   0.005765   1.413    0.188
## 
## Residual standard error: 16.79 on 10 degrees of freedom
##   (12 observations deleted due to missingness)
## Multiple R-squared:  0.1665, Adjusted R-squared:  0.08312 
## F-statistic: 1.997 on 1 and 10 DF,  p-value: 0.188
# PARTICLE
part_otu_D0chao_stats <- lm(frac_bacprod ~ D0.chao, data = part_otu_alpha)
part_otu_D0chao_stats
## 
## Call:
## lm(formula = frac_bacprod ~ D0.chao, data = part_otu_alpha)
## 
## Coefficients:
## (Intercept)      D0.chao  
##     -5.9672       0.0106
summary(part_otu_D0chao_stats)
## 
## Call:
## lm(formula = frac_bacprod ~ D0.chao, data = part_otu_alpha)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.6688 -2.7353  0.8518  2.4842  5.4200 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -5.967203   3.069404  -1.944 0.080533 .  
## D0.chao      0.010596   0.001869   5.671 0.000207 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.218 on 10 degrees of freedom
##   (11 observations deleted due to missingness)
## Multiple R-squared:  0.7628, Adjusted R-squared:  0.7391 
## F-statistic: 32.16 on 1 and 10 DF,  p-value: 0.0002065
# Plot  chao1
otu_pheno_D0chao <- ggplot(otu_phenoflow_alpha, aes(x=D0.chao, y=frac_bacprod, color = fraction)) + 
  geom_point(size = 3.5) + geom_errorbarh(aes(xmin = D0.chao - sd.D0.chao, xmax = D0.chao + sd.D0.chao), width = 0.2) + 
  ggtitle("OTU: Phenoflow") +
  scale_color_manual(values = c("firebrick3","cornflowerblue"), limits = c("WholePart", "Free")) +
  scale_x_continuous(limits = c(200,4000)) + 
  scale_y_continuous(limits = c(0,70),expand = c(0,0)) + 
  ylab("Production (μgC/L/hr)") + xlab("Chao1 (D0)") +
  #geom_smooth(data=subset(otu_phenoflow_alpha, fraction == "Free"), method='lm') + 
  geom_smooth(data=subset(otu_phenoflow_alpha, fraction == "WholePart"), method='lm') + 
  theme(legend.position=c(0.15,0.9), legend.title=element_blank()) +
  annotate("text", x = 2000, y=35, color = "cornflowerblue", fontface = "bold",
           label = paste("R2 =", round(summary(free_otu_D0chao_stats)$adj.r.squared, digits = 4), "\n", 
                         "p =", round(unname(summary(free_otu_D0chao_stats)$coefficients[,4][2]), digits = 4))) + 
  annotate("text", x = 2700, y=8, color = "firebrick3", fontface = "bold",
           label = paste("R2 =", round(summary(part_otu_D0chao_stats)$adj.r.squared, digits = 4), "\n", 
                         "p =", round(unname(summary(part_otu_D0chao_stats)$coefficients[,4][2]), digits = 4)))
## Warning: Ignoring unknown parameters: width
# Can fractional production be predicted by phenoflow D1 SHANNON ENTROPY? 
# FREE LIVING
free_otu_D1_stats <- lm(frac_bacprod ~ D1, data = free_otu_alpha)
free_otu_D1_stats
## 
## Call:
## lm(formula = frac_bacprod ~ D1, data = free_otu_alpha)
## 
## Coefficients:
## (Intercept)           D1  
##     11.1880       0.1906
summary(free_otu_D1_stats)
## 
## Call:
## lm(formula = frac_bacprod ~ D1, data = free_otu_alpha)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -22.768 -10.512  -2.281   7.633  34.265 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  11.1880    18.3486   0.610    0.556
## D1            0.1906     0.2605   0.732    0.481
## 
## Residual standard error: 17.91 on 10 degrees of freedom
##   (12 observations deleted due to missingness)
## Multiple R-squared:  0.05082,    Adjusted R-squared:  -0.04409 
## F-statistic: 0.5355 on 1 and 10 DF,  p-value: 0.4811
# PARTICLE
part_otu_D1_stats <- lm(frac_bacprod ~ D1, data = part_otu_alpha)
part_otu_D1_stats
## 
## Call:
## lm(formula = frac_bacprod ~ D1, data = part_otu_alpha)
## 
## Coefficients:
## (Intercept)           D1  
##     1.03603      0.06921
summary(part_otu_D1_stats)
## 
## Call:
## lm(formula = frac_bacprod ~ D1, data = part_otu_alpha)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -8.068 -2.382 -1.078  3.249 10.867 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  1.03603    2.81221   0.368  0.72025   
## D1           0.06921    0.01792   3.862  0.00315 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.487 on 10 degrees of freedom
##   (11 observations deleted due to missingness)
## Multiple R-squared:  0.5986, Adjusted R-squared:  0.5585 
## F-statistic: 14.91 on 1 and 10 DF,  p-value: 0.003151
# Plot D1 SHANNON ENTROPY
otu_pheno_D1 <- ggplot(otu_phenoflow_alpha, aes(x=D1, y=frac_bacprod, color = fraction)) + 
  geom_point(size = 3.5) + geom_errorbarh(aes(xmin = D1 - sd.D1, xmax = D1 + sd.D1), width = 0.2) + 
  ggtitle("OTU: Phenoflow") +
  scale_color_manual(values = c("firebrick3","cornflowerblue"), limits = c("WholePart", "Free")) +
  scale_x_continuous(limits = c(0,400), expand = c(0,0)) + 
  scale_y_continuous(limits = c(0,70),expand = c(0,0)) + 
  ylab("Production (μgC/L/hr)") + xlab("Shannon Entropy (D1)") +
  #geom_smooth(data=subset(otu_phenoflow_alpha, fraction == "Free"), method='lm') + 
  geom_smooth(data=subset(otu_phenoflow_alpha, fraction == "WholePart"), method='lm') + 
  theme(legend.position=c(0.85,0.9), legend.title=element_blank()) +
  annotate("text", x = 175, y=35, color = "cornflowerblue", fontface = "bold",
           label = paste("R2 =", round(summary(free_otu_D1_stats)$adj.r.squared, digits = 4), "\n", 
                         "p =", round(unname(summary(free_otu_D1_stats)$coefficients[,4][2]), digits = 4))) + 
  annotate("text", x = 275, y=7, color = "firebrick3", fontface = "bold",
           label = paste("R2 =", round(summary(part_otu_D1_stats)$adj.r.squared, digits = 4), "\n", 
                         "p =", round(unname(summary(part_otu_D1_stats)$coefficients[,4][2]), digits = 4)))
## Warning: Ignoring unknown parameters: width
# Can fractional production be predicted by phenoflow D2 INVERSE SIMPSON? 
# FREE LIVING
free_otu_D2_stats <- lm(frac_bacprod ~ D2, data = free_otu_alpha)
free_otu_D2_stats
## 
## Call:
## lm(formula = frac_bacprod ~ D2, data = free_otu_alpha)
## 
## Coefficients:
## (Intercept)           D2  
##     11.4212       0.4847
summary(free_otu_D2_stats)
## 
## Call:
## lm(formula = frac_bacprod ~ D2, data = free_otu_alpha)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -22.602 -10.034  -1.905   6.828  35.512 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  11.4212    21.8934   0.522    0.613
## D2            0.4847     0.8148   0.595    0.565
## 
## Residual standard error: 18.07 on 10 degrees of freedom
##   (12 observations deleted due to missingness)
## Multiple R-squared:  0.03418,    Adjusted R-squared:  -0.0624 
## F-statistic: 0.3539 on 1 and 10 DF,  p-value: 0.5651
# PARTICLE
part_otu_D2_stats <- lm(frac_bacprod ~ D2, data = part_otu_alpha)
part_otu_D2_stats
## 
## Call:
## lm(formula = frac_bacprod ~ D2, data = part_otu_alpha)
## 
## Coefficients:
## (Intercept)           D2  
##     0.02044      0.26298
summary(part_otu_D2_stats)
## 
## Call:
## lm(formula = frac_bacprod ~ D2, data = part_otu_alpha)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.2971 -2.1906 -0.2354  1.3639  7.6078 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.02044    2.33018   0.009 0.993173    
## D2           0.26298    0.05084   5.173 0.000417 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.517 on 10 degrees of freedom
##   (11 observations deleted due to missingness)
## Multiple R-squared:  0.7279, Adjusted R-squared:  0.7007 
## F-statistic: 26.76 on 1 and 10 DF,  p-value: 0.0004174
# Plot D2 INVERSE SIMPSON
otu_pheno_D2 <- ggplot(otu_phenoflow_alpha, aes(x=D2, y=frac_bacprod, color = fraction)) + 
  geom_point(size = 3.5) + geom_errorbarh(aes(xmin = D2 - sd.D2, xmax = D2 + sd.D2), width = 0.2) + 
  ggtitle("OTU: Phenoflow") +
  scale_color_manual(values = c("firebrick3","cornflowerblue"), limits = c("WholePart", "Free")) +
  scale_x_continuous(limits = c(0,80), expand = c(0,0)) + 
  scale_y_continuous(limits = c(0,70),expand = c(0,0)) + 
  ylab("Production (μgC/L/hr)") + xlab("Inverse Simpson (D2)") +
  #geom_smooth(data=subset(otu_phenoflow_alpha, fraction == "Free"), method='lm') + 
  geom_smooth(data=subset(otu_phenoflow_alpha, fraction == "WholePart"), method='lm') + 
  theme(legend.position=c(0.85,0.9), legend.title=element_blank()) +
  annotate("text", x = 40, y=45, color = "cornflowerblue", fontface = "bold",
           label = paste("R2 =", round(summary(free_otu_D2_stats)$adj.r.squared, digits = 4), "\n", 
                         "p =", round(unname(summary(free_otu_D2_stats)$coefficients[,4][2]), digits = 4))) + 
  annotate("text", x = 50, y=25, color = "firebrick3", fontface = "bold",
           label = paste("R2 =", round(summary(part_otu_D2_stats)$adj.r.squared, digits = 4), "\n", 
                         "p =", round(unname(summary(part_otu_D2_stats)$coefficients[,4][2]), digits = 4)))
## Warning: Ignoring unknown parameters: width
otu_phenoflow <- plot_grid(otu_pheno_D0, otu_pheno_D0chao, otu_pheno_D1, otu_pheno_D2,
          labels = c("A", "B", "C", "D"), 
          align = "h", nrow = 2, ncol = 2)
## Warning: Removed 12 rows containing non-finite values (stat_smooth).
## Warning: Removed 139 rows containing missing values (geom_point).
## Warning: Removed 116 rows containing missing values (geom_errorbarh).
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <ce>
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <bc>
## Warning: Removed 12 rows containing non-finite values (stat_smooth).
## Warning: Removed 139 rows containing missing values (geom_point).
## Warning: Removed 116 rows containing missing values (geom_errorbarh).
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <ce>
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <bc>
## Warning: Removed 12 rows containing non-finite values (stat_smooth).
## Warning: Removed 139 rows containing missing values (geom_point).
## Warning: Removed 115 rows containing missing values (geom_errorbarh).
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <ce>
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <bc>
## Warning: Removed 14 rows containing non-finite values (stat_smooth).
## Warning: Removed 141 rows containing missing values (geom_point).
## Warning: Removed 117 rows containing missing values (geom_errorbarh).
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <ce>
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <bc>
otu_phenoflow

#ggsave("Figures/Phenoflow_otu.png", otu_phenoflow, dpi = 600, units = "in", width = 10, height = 8)

OLIGOTYPING Phenoflow Analysis

# OTU analysis
# Create a new file called "otu_merged_musk_Physeq.RData" that has the phyloseq object
#save(list=c("otu_merged_musk"), file=paste0("Phenoflow/otu_merged_musk_Physeq.RData")) # Will be run on Flux

#Phenoflow::Diversity_16S(otu_merged_musk, R=3)

# Oligotyping
#oligo_phenoflow <- Diversity_16S(oligo_merged_musk, R=100, brea = FALSE)
#write.table(dat, "PhenoFlow/oligo.phenoflow", sep = "\t")

# Do not overwrite the important dataframe!
oligo_phenoflow_alpha <- read.table("PhenoFlow/oligo.phenoflow", sep = "\t", header = TRUE)
# Add the rownames so that it can be combined with the metadata
oligo_phenoflow_alpha <- data.frame(oligo_phenoflow_alpha, norep_filter_name = rownames(oligo_phenoflow_alpha))
# Create the metadata information 
meta_dat <- data.frame(sample_data(oligo_merged_musk))
# Merge phenoflow data and 
oligo_phenoflow_alpha <- merge(oligo_phenoflow_alpha, meta_dat, by = "norep_filter_name") %>%
  select(-sd.D0.bre, -D0.bre)



### PREPARE DATA FRAMES FOR PHENOFLOW ALPHA DIVERSITY ANALYSIS
free_oligo_alpha <- filter(oligo_phenoflow_alpha, fraction == "Free" & norep_filter_name != "MOTHJ715" & year == "2015")
part_oligo_alpha <- filter(oligo_phenoflow_alpha, fraction == "WholePart" & norep_filter_name != "MOTHJ715" & year == "2015")


# Can fractional production be predicted by phenoflow D0 observed richness? 
# FREE LIVING
free_oligo_D0_stats <- lm(frac_bacprod ~ D0, data = free_oligo_alpha)
free_oligo_D0_stats
## 
## Call:
## lm(formula = frac_bacprod ~ D0, data = free_oligo_alpha)
## 
## Coefficients:
## (Intercept)           D0  
##    13.29673      0.03747
summary(free_oligo_D0_stats)
## 
## Call:
## lm(formula = frac_bacprod ~ D0, data = free_oligo_alpha)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -20.127 -13.686  -1.857   6.963  39.476 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.29673   63.77924   0.208    0.839
## D0           0.03747    0.22105   0.170    0.869
## 
## Residual standard error: 18.36 on 10 degrees of freedom
##   (12 observations deleted due to missingness)
## Multiple R-squared:  0.002865,   Adjusted R-squared:  -0.09685 
## F-statistic: 0.02873 on 1 and 10 DF,  p-value: 0.8688
# PARTICLE
part_oligo_D0_stats <- lm(frac_bacprod ~ D0, data = part_oligo_alpha)
part_oligo_D0_stats
## 
## Call:
## lm(formula = frac_bacprod ~ D0, data = part_oligo_alpha)
## 
## Coefficients:
## (Intercept)           D0  
##    -35.8327       0.1747
summary(part_oligo_D0_stats)
## 
## Call:
## lm(formula = frac_bacprod ~ D0, data = part_oligo_alpha)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.2762  -4.1880  -0.8822   5.8425  11.2597 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -35.83267   19.11629  -1.874   0.0903 .
## D0            0.17471    0.07246   2.411   0.0366 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.887 on 10 degrees of freedom
##   (11 observations deleted due to missingness)
## Multiple R-squared:  0.3676, Adjusted R-squared:  0.3044 
## F-statistic: 5.814 on 1 and 10 DF,  p-value: 0.03661
# Plot D0 Observed Richness
oligo_pheno_D0 <- ggplot(oligo_phenoflow_alpha, aes(x=D0, y=frac_bacprod, color = fraction)) + 
  geom_point(size = 3.5) + geom_errorbarh(aes(xmin = D0 - sd.D0, xmax = D0 + sd.D0), width = 0.2) + 
  ggtitle("Oligotyping: Phenoflow") +
  scale_color_manual(values = c("firebrick3","cornflowerblue"), limits = c("WholePart", "Free")) +
  scale_x_continuous(limits = c(200,400)) + 
  scale_y_continuous(limits = c(0,70),expand = c(0,0)) + 
  ylab("Production (μgC/L/hr)") + xlab("Observed Richness (D0)") +
  #geom_smooth(data=subset(oligo_phenoflow_alpha, fraction == "Free"), method='lm') + 
  geom_smooth(data=subset(oligo_phenoflow_alpha, fraction == "WholePart"), method='lm') + 
  theme(legend.position=c(0.15,0.9), legend.title=element_blank()) +
  annotate("text", x = 370, y=25, color = "cornflowerblue", fontface = "bold",
           label = paste("R2 =", round(summary(free_oligo_D0_stats)$adj.r.squared, digits = 4), "\n", 
                         "p =", round(unname(summary(free_oligo_D0_stats)$coefficients[,4][2]), digits = 4))) + 
  annotate("text", x = 370, y=5, color = "firebrick3", fontface = "bold",
           label = paste("R2 =", round(summary(part_oligo_D0_stats)$adj.r.squared, digits = 4), "\n", 
                         "p =", round(unname(summary(part_oligo_D0_stats)$coefficients[,4][2]), digits = 4)))
## Warning: Ignoring unknown parameters: width
# Can fractional production be predicted by phenoflow chao1? 
# FREE LIVING
free_oligo_D0chao_stats <- lm(frac_bacprod ~ D0.chao, data = free_oligo_alpha)
free_oligo_D0chao_stats
## 
## Call:
## lm(formula = frac_bacprod ~ D0.chao, data = free_oligo_alpha)
## 
## Coefficients:
## (Intercept)      D0.chao  
##    -5.84212      0.09034
summary(free_oligo_D0chao_stats)
## 
## Call:
## lm(formula = frac_bacprod ~ D0.chao, data = free_oligo_alpha)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -20.816 -13.372  -1.008   7.773  38.028 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept) -5.84212   72.99061  -0.080    0.938
## D0.chao      0.09034    0.21987   0.411    0.690
## 
## Residual standard error: 18.23 on 10 degrees of freedom
##   (12 observations deleted due to missingness)
## Multiple R-squared:  0.0166, Adjusted R-squared:  -0.08174 
## F-statistic: 0.1688 on 1 and 10 DF,  p-value: 0.6898
# PARTICLE
part_oligo_D0chao_stats <- lm(frac_bacprod ~ D0.chao, data = part_oligo_alpha)
part_oligo_D0chao_stats
## 
## Call:
## lm(formula = frac_bacprod ~ D0.chao, data = part_oligo_alpha)
## 
## Coefficients:
## (Intercept)      D0.chao  
##    -27.1424       0.1179
summary(part_oligo_D0chao_stats)
## 
## Call:
## lm(formula = frac_bacprod ~ D0.chao, data = part_oligo_alpha)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -8.494 -5.694 -1.356  4.844 16.549 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept) -27.14237   25.92181  -1.047    0.320
## D0.chao       0.11786    0.08191   1.439    0.181
## 
## Residual standard error: 7.883 on 10 degrees of freedom
##   (11 observations deleted due to missingness)
## Multiple R-squared:  0.1715, Adjusted R-squared:  0.08866 
## F-statistic:  2.07 on 1 and 10 DF,  p-value: 0.1808
# Plot  chao1
oligo_pheno_D0chao <- ggplot(oligo_phenoflow_alpha, aes(x=D0.chao, y=frac_bacprod, color = fraction)) + 
  geom_point(size = 3.5) + geom_errorbarh(aes(xmin = D0.chao - sd.D0.chao, xmax = D0.chao + sd.D0.chao), width = 0.2) + 
  ggtitle("Oligotyping: Phenoflow") +
  scale_color_manual(values = c("firebrick3","cornflowerblue"), limits = c("WholePart", "Free")) +
  scale_x_continuous(limits = c(200,400)) + 
  scale_y_continuous(limits = c(0,70),expand = c(0,0)) + 
  ylab("Production (μgC/L/hr)") + xlab("Chao1 (D0)") +
  #geom_smooth(data=subset(oligo_phenoflow_alpha, fraction == "Free"), method='lm') + 
  #geom_smooth(data=subset(oligo_phenoflow_alpha, fraction == "WholePart"), method='lm') + 
  theme(legend.position=c(0.15,0.9), legend.title=element_blank()) +
  annotate("text", x = 220, y=25, color = "cornflowerblue", fontface = "bold",
           label = paste("R2 =", round(summary(free_oligo_D0chao_stats)$adj.r.squared, digits = 4), "\n", 
                         "p =", round(unname(summary(free_oligo_D0chao_stats)$coefficients[,4][2]), digits = 4))) + 
  annotate("text", x = 220, y=5, color = "firebrick3", fontface = "bold",
           label = paste("R2 =", round(summary(part_oligo_D0chao_stats)$adj.r.squared, digits = 4), "\n", 
                         "p =", round(unname(summary(part_oligo_D0chao_stats)$coefficients[,4][2]), digits = 4)))
## Warning: Ignoring unknown parameters: width
# Can fractional production be predicted by phenoflow D1 SHANNON ENTROPY? 
# FREE LIVING
free_oligo_D1_stats <- lm(frac_bacprod ~ D1, data = free_oligo_alpha)
free_oligo_D1_stats
## 
## Call:
## lm(formula = frac_bacprod ~ D1, data = free_oligo_alpha)
## 
## Coefficients:
## (Intercept)           D1  
##     -29.372        0.758
summary(free_oligo_D1_stats)
## 
## Call:
## lm(formula = frac_bacprod ~ D1, data = free_oligo_alpha)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -24.285  -5.322  -1.406   5.520  26.706 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -29.3722    28.9643  -1.014   0.3344  
## D1            0.7580     0.4057   1.869   0.0912 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.83 on 10 degrees of freedom
##   (12 observations deleted due to missingness)
## Multiple R-squared:  0.2588, Adjusted R-squared:  0.1847 
## F-statistic: 3.491 on 1 and 10 DF,  p-value: 0.09123
# PARTICLE
part_oligo_D1_stats <- lm(frac_bacprod ~ D1, data = part_oligo_alpha)
part_oligo_D1_stats
## 
## Call:
## lm(formula = frac_bacprod ~ D1, data = part_oligo_alpha)
## 
## Coefficients:
## (Intercept)           D1  
##     -5.1569       0.2134
summary(part_oligo_D1_stats)
## 
## Call:
## lm(formula = frac_bacprod ~ D1, data = part_oligo_alpha)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -10.341  -1.393  -0.722   1.517   8.461 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept) -5.15685    3.86546  -1.334  0.21176   
## D1           0.21342    0.05018   4.253  0.00168 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.168 on 10 degrees of freedom
##   (11 observations deleted due to missingness)
## Multiple R-squared:  0.644,  Adjusted R-squared:  0.6084 
## F-statistic: 18.09 on 1 and 10 DF,  p-value: 0.001681
# Plot D1 SHANNON ENTROPY
oligo_pheno_D1 <- ggplot(oligo_phenoflow_alpha, aes(x=D1, y=frac_bacprod, color = fraction)) + 
  geom_point(size = 3.5) + geom_errorbarh(aes(xmin = D1 - sd.D1, xmax = D1 + sd.D1), width = 0.2) + 
  ggtitle("Oligotyping: Phenoflow") +
  scale_color_manual(values = c("firebrick3","cornflowerblue"), limits = c("WholePart", "Free")) +
  scale_x_continuous(limits = c(0,150), expand = c(0,0)) + 
  scale_y_continuous(limits = c(0,70),expand = c(0,0)) + 
  ylab("Production (μgC/L/hr)") + xlab("Shannon Entropy (D1)") +
  geom_smooth(data=subset(oligo_phenoflow_alpha, fraction == "Free"), method='lm') + 
  geom_smooth(data=subset(oligo_phenoflow_alpha, fraction == "WholePart"), method='lm') + 
  theme(legend.position=c(0.15,0.9), legend.title=element_blank()) +
  annotate("text", x = 110, y=45, color = "cornflowerblue", fontface = "bold",
           label = paste("R2 =", round(summary(free_oligo_D1_stats)$adj.r.squared, digits = 4), "\n", 
                         "p =", round(unname(summary(free_oligo_D1_stats)$coefficients[,4][2]), digits = 4))) + 
  annotate("text", x = 120, y=5, color = "firebrick3", fontface = "bold",
           label = paste("R2 =", round(summary(part_oligo_D1_stats)$adj.r.squared, digits = 4), "\n", 
                         "p =", round(unname(summary(part_oligo_D1_stats)$coefficients[,4][2]), digits = 4)))
## Warning: Ignoring unknown parameters: width
# Can fractional production be predicted by phenoflow D2 INVERSE SIMPSON? 
# FREE LIVING
free_oligo_D2_stats <- lm(frac_bacprod ~ D2, data = free_oligo_alpha)
free_oligo_D2_stats
## 
## Call:
## lm(formula = frac_bacprod ~ D2, data = free_oligo_alpha)
## 
## Coefficients:
## (Intercept)           D2  
##     -7.4410       0.9395
summary(free_oligo_D2_stats)
## 
## Call:
## lm(formula = frac_bacprod ~ D2, data = free_oligo_alpha)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -19.900  -7.696  -1.478   3.820  30.038 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  -7.4410    15.5092  -0.480   0.6417  
## D2            0.9395     0.4433   2.119   0.0601 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.28 on 10 degrees of freedom
##   (12 observations deleted due to missingness)
## Multiple R-squared:  0.3099, Adjusted R-squared:  0.2409 
## F-statistic: 4.491 on 1 and 10 DF,  p-value: 0.0601
# PARTICLE
part_oligo_D2_stats <- lm(frac_bacprod ~ D2, data = part_oligo_alpha)
part_oligo_D2_stats
## 
## Call:
## lm(formula = frac_bacprod ~ D2, data = part_oligo_alpha)
## 
## Coefficients:
## (Intercept)           D2  
##     -2.1723       0.3782
summary(part_oligo_D2_stats)
## 
## Call:
## lm(formula = frac_bacprod ~ D2, data = part_oligo_alpha)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.7855 -2.4461 -0.7825  2.0559  7.7425 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -2.17231    2.77374  -0.783  0.45168    
## D2           0.37821    0.07549   5.010  0.00053 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.623 on 10 degrees of freedom
##   (11 observations deleted due to missingness)
## Multiple R-squared:  0.7151, Adjusted R-squared:  0.6866 
## F-statistic:  25.1 on 1 and 10 DF,  p-value: 0.0005296
# Plot D2 INVERSE SIMPSON
oligo_pheno_D2 <- ggplot(oligo_phenoflow_alpha, aes(x=D2, y=frac_bacprod, color = fraction)) + 
  geom_point(size = 3.5) + geom_errorbarh(aes(xmin = D2 - sd.D2, xmax = D2 + sd.D2), width = 0.2) + 
  ggtitle("Oligotyping: Phenoflow") +
  scale_color_manual(values = c("firebrick3","cornflowerblue"), limits = c("WholePart", "Free")) +
  scale_x_continuous(limits = c(0,80), expand = c(0,0)) + 
  scale_y_continuous(limits = c(0,70),expand = c(0,0)) + 
  ylab("Production (μgC/L/hr)") + xlab("Inverse Simpson (D2)") +
  geom_smooth(data=subset(oligo_phenoflow_alpha, fraction == "Free"), method='lm') + 
  geom_smooth(data=subset(oligo_phenoflow_alpha, fraction == "WholePart"), method='lm') + 
  theme(legend.position=c(0.15,0.9), legend.title=element_blank()) +
  annotate("text", x = 65, y=45, color = "cornflowerblue", fontface = "bold",
           label = paste("R2 =", round(summary(free_oligo_D2_stats)$adj.r.squared, digits = 4), "\n", 
                         "p =", round(unname(summary(free_oligo_D2_stats)$coefficients[,4][2]), digits = 4))) + 
  annotate("text", x = 50, y=5, color = "firebrick3", fontface = "bold",
           label = paste("R2 =", round(summary(part_oligo_D2_stats)$adj.r.squared, digits = 4), "\n", 
                         "p =", round(unname(summary(part_oligo_D2_stats)$coefficients[,4][2]), digits = 4)))
## Warning: Ignoring unknown parameters: width
oligotyping_phenoflow <- plot_grid(oligo_pheno_D0, oligo_pheno_D0chao, oligo_pheno_D1, oligo_pheno_D2,
          labels = c("A", "B", "C", "D"), 
          align = "h", nrow = 2, ncol = 2)
## Warning: Removed 12 rows containing non-finite values (stat_smooth).
## Warning: Removed 139 rows containing missing values (geom_point).
## Warning: Removed 115 rows containing missing values (geom_errorbarh).
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <ce>
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <bc>
## Warning: Removed 139 rows containing missing values (geom_point).
## Warning: Removed 116 rows containing missing values (geom_errorbarh).
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <ce>
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <bc>
## Warning: Removed 34 rows containing non-finite values (stat_smooth).
## Warning: Removed 12 rows containing non-finite values (stat_smooth).
## Warning: Removed 139 rows containing missing values (geom_point).
## Warning: Removed 115 rows containing missing values (geom_errorbarh).
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <ce>
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <bc>
## Warning: Removed 34 rows containing non-finite values (stat_smooth).
## Warning: Removed 12 rows containing non-finite values (stat_smooth).
## Warning: Removed 139 rows containing missing values (geom_point).
## Warning: Removed 115 rows containing missing values (geom_errorbarh).
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <ce>
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <bc>
oligotyping_phenoflow

#ggsave("Figures/Phenoflow_oligotyping.png", oligotyping_phenoflow, dpi = 600, units = "in", width = 10, height = 8)

Remove samples with too few reads

##  OTU: SUBSAMPLE AT 6600
# OLIGOTYPING: SUBSAMPLE AT 4300

# Prune samples out that have too few reads
otu_merged_pruned <- prune_samples(sample_sums(otu_merged) > 6600, otu_merged)
otu_merged_pruned <- prune_taxa(taxa_sums(otu_merged_pruned) > 0, otu_merged_pruned)
min(sample_sums(otu_merged_pruned))
## [1] 6665
# Prune samples out that have too few reads
oligo_merged_pruned <- prune_samples(sample_sums(oligo_merged) > 4300, oligo_merged)
oligo_merged_pruned <- prune_taxa(taxa_sums(oligo_merged_pruned) > 0, oligo_merged_pruned)
min(sample_sums(oligo_merged_pruned))
## [1] 4332
# Scale the samples
scaled_otu_merged_pruned <- otu_merged_pruned %>%
  scale_reads(round = "round") 
scaled_oligo_merged_pruned <- oligo_merged_pruned %>%
  scale_reads(round = "round") 

Alpha Diversity: Analysis ran on December 16th, 2016

# Initialize matrices to store richness and evenness estimates
 # nsamp <- nsamples(oligo_merged_pruned)
 # min_lib <- min(sample_sums(oligo_merged_pruned)) - 1
 # 
 # oligo_richness <- matrix(nrow = nsamp, ncol = 100)
 # row.names(oligo_richness) <- sample_names(oligo_merged_pruned)
 # 
 # oligo_evenness <- matrix(nrow = nsamp, ncol = 100)
 # row.names(oligo_evenness) <- sample_names(oligo_merged_pruned)
 # 
 # oligo_shannon <- matrix(nrow = nsamp, ncol = 100)
 # row.names(oligo_shannon) <- sample_names(oligo_merged_pruned)
 # 
 # # It is always important to set a seed when you subsample so your result is replicable
 # set.seed(777)
 # 
 # for (i in 1:100) {
 #  # Subsample
 #  r <- rarefy_even_depth(oligo_merged_pruned, sample.size = min_lib, verbose = FALSE, replace = TRUE)
 # 
 #  # Calculate richness
 #  rich <- as.numeric(as.matrix(estimate_richness(r, measures = "Observed")))
 #  oligo_richness[ ,i] <- rich
 # 
 #  # Calculate evenness
 #  even <- as.numeric(as.matrix(estimate_richness(r, measures = "InvSimpson")))
 #  oligo_evenness[ ,i] <- even
 # 
 #  # Calculate Shannon Entropy
 #  shannon <- as.numeric(as.matrix(estimate_richness(r, measures = "Shannon")))
 #  oligo_shannon[ ,i] <- shannon
 # 
 # }

 # write.table(oligo_evenness, "metadata/oligo_evenness100_rarefy4331", row.names = TRUE)
 # write.table(oligo_richness, "metadata/oligo_richness100_rarefy4331", row.names = TRUE)
 # write.table(oligo_shannon, "metadata/oligo_shannon100_rarefy4331", row.names = TRUE)
 # 
 # 
#  
# # Initialize matrices to store richness and evenness estimates
 # nsamp <- nsamples(otu_merged_pruned)
 # min_lib <- min(sample_sums(otu_merged_pruned)) - 1
 # 
 # otu_richness <- matrix(nrow = nsamp, ncol = 100)
 # row.names(otu_richness) <- sample_names(otu_merged_pruned)
 # 
 # otu_evenness <- matrix(nrow = nsamp, ncol = 100)
 # row.names(otu_evenness) <- sample_names(otu_merged_pruned)
 # 
 # otu_shannon <- matrix(nrow = nsamp, ncol = 100)
 # row.names(otu_shannon) <- sample_names(otu_merged_pruned)
 # 
 # # It is always important to set a seed when you subsample so your result is replicable
 # set.seed(777)
 # 
 # for (i in 1:100) {
 #  # Subsample
 #  r <- rarefy_even_depth(otu_merged_pruned, sample.size = min_lib, verbose = FALSE, replace = TRUE)
 # 
 #  # Calculate richness
 #  rich <- as.numeric(as.matrix(estimate_richness(r, measures = "Observed")))
 #  otu_richness[ ,i] <- rich
 # 
 #  # Calculate evenness
 #  even <- as.numeric(as.matrix(estimate_richness(r, measures = "InvSimpson")))
 #  otu_evenness[ ,i] <- even
 # 
 #  # Calculate evenness
 #  shannon <- as.numeric(as.matrix(estimate_richness(r, measures = "Shannon")))
 #  otu_shannon[ ,i] <- shannon
 # 
 # }

#  write.table(otu_evenness, "metadata/otu_evenness100_rarefy6664", row.names = TRUE)
#  write.table(otu_richness, "metadata/otu_richness100_rarefy6664", row.names = TRUE)
#  write.table(otu_shannon, "metadata/otu_shannon100_rarefy6664", row.names = TRUE)

Vegan Alpha Diversity Analysis

OTU Analysis

# Load values
nsamp <- nsamples(otu_merged_pruned)
min_lib <- min(sample_sums(otu_merged_pruned)) - 1

# Read in the files 
otu_richness <- read.table("metadata/otu_richness100_rarefy6664",  header = TRUE)
otu_evenness <- read.table("metadata/otu_evenness100_rarefy6664", header = TRUE)
otu_shannon <- read.table("metadata/otu_shannon100_rarefy6664", header = TRUE)

# Create a new dataframe to hold the means and standard deviations of richness estimates
norep_filter_name <- row.names(otu_richness)
mean <- apply(otu_richness, 1, mean)
sd <- apply(otu_richness, 1, sd)
measure <- rep("Richness", nsamp)
otu_rich_stats <- data.frame(norep_filter_name, mean, sd, measure)

# Create a new dataframe to hold the means and standard deviations of evenness estimates
norep_filter_name <- row.names(otu_evenness)
mean <- apply(otu_evenness, 1, mean)
sd <- apply(otu_evenness, 1, sd)
measure <- rep("Inverse_Simpson", nsamp)
otu_even_stats <- data.frame(norep_filter_name, mean, sd, measure)

# Create a new dataframe to hold the means and standard deviations of shannon entropy estimates
norep_filter_name <- row.names(otu_shannon)
mean <- apply(otu_shannon, 1, mean)
sd <- apply(otu_shannon, 1, sd)
measure <- rep("Shannon_Entropy", nsamp)
otu_shan_stats <- data.frame(norep_filter_name, mean, sd, measure)

# Create a new dataframe to hold the means and standard deviations of simpsons evenness estimates
norep_filter_name <- row.names(otu_evenness)
mean <- apply(otu_evenness, 1, mean)
sd <- apply(otu_evenness, 1, sd)
measure <- rep("Inverse_Simpson", nsamp)
otu_simpseven_stats <- data.frame(norep_filter_name, mean, sd, measure)

# Calculate Simpson's Evenness into new df called "simps_evenness"
otu_simps_evenness <- inner_join(otu_rich_stats, otu_even_stats, by = "norep_filter_name") %>%
  mutate(mean = mean.y/mean.x,
         sd = sd(mean),
         measure = "Simpsons_Evenness") %>%
  select(norep_filter_name, mean, sd, measure)

# Combine alpha diversity into one dataframe 
otu_alpha <- rbind(otu_rich_stats, otu_even_stats, otu_simps_evenness, otu_shan_stats)
s <- data.frame(sample_data(otu_merged_pruned))
otu_alphadiv <- merge(otu_alpha, s, by = "norep_filter_name") %>%
  filter(project == "Muskegon_Lake")

ggplot(filter(otu_alphadiv, measure == "Richness"), aes(x = norep_filter_name, y = mean, color = lakesite)) +
  geom_point() + facet_grid(project~fraction, scales = "free") + 
  xlab("Sample Name") + ylab("Richness") +
  theme(axis.text.x = element_text(angle = 30))  #Set the x-axis labels)

ggplot(filter(otu_alphadiv, measure == "Inverse_Simpson"), aes(x = norep_filter_name, y = mean, color = lakesite)) +
  geom_point() + facet_grid(project~fraction, scales = "free") + 
  xlab("Sample Name") + ylab("Inverse Simpson") +
  theme(axis.text.x = element_text(angle = 30))  #Set the x-axis labels)

ggplot(filter(otu_alphadiv, measure == "Shannon_Entropy"), aes(x = norep_filter_name, y = mean, color = lakesite)) +
  geom_point() + facet_grid(project~fraction, scales = "free") + 
  xlab("Sample Name") + ylab("Shannon Entropy") +
  theme(axis.text.x = element_text(angle = 30))  #Set the x-axis labels)

###############################
ML_otu_rich_stats <- filter(otu_alphadiv, measure == "Richness" & project == "Muskegon_Lake" & fraction %in% c("WholePart", "Free") & year == "2015")

# Can fractional production be predicted by richness? 
free_ML_otu_rich_stats <- filter(ML_otu_rich_stats, fraction == "Free")
freeprod_ML_otu_rich <- lm(frac_bacprod ~ mean, data = free_ML_otu_rich_stats)
freeprod_ML_otu_rich
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = free_ML_otu_rich_stats)
## 
## Coefficients:
## (Intercept)         mean  
##     0.51402      0.05311
summary(freeprod_ML_otu_rich)
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = free_ML_otu_rich_stats)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -19.937 -11.778  -1.399  10.299  27.480 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  0.51402   15.94204   0.032    0.975
## mean         0.05311    0.03430   1.548    0.153
## 
## Residual standard error: 16.51 on 10 degrees of freedom
##   (12 observations deleted due to missingness)
## Multiple R-squared:  0.1934, Adjusted R-squared:  0.1127 
## F-statistic: 2.398 on 1 and 10 DF,  p-value: 0.1526
part_ML_abs_rich_stats <- filter(ML_otu_rich_stats, fraction == "WholePart")
partprod_MLotu_rich <- lm(frac_bacprod ~ mean, data = part_ML_abs_rich_stats)
partprod_MLotu_rich
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = part_ML_abs_rich_stats)
## 
## Coefficients:
## (Intercept)         mean  
##    -8.75434      0.02831
summary(partprod_MLotu_rich)
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = part_ML_abs_rich_stats)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.1168 -4.1580 -0.2669  3.6917  8.8609 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)   
## (Intercept) -8.754341   4.704766  -1.861   0.0924 . 
## mean         0.028315   0.006728   4.209   0.0018 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.203 on 10 degrees of freedom
##   (10 observations deleted due to missingness)
## Multiple R-squared:  0.6391, Adjusted R-squared:  0.6031 
## F-statistic: 17.71 on 1 and 10 DF,  p-value: 0.001804
# Plot 
otu_rich_vegan <- ggplot(ML_otu_rich_stats, aes(x=mean, y=frac_bacprod, color = fraction)) + 
  geom_point(size = 3.5) + geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd), width = 0.2) + 
  ggtitle("OTU: Vegan") +
  scale_color_manual(values = c("firebrick3","cornflowerblue"), limits = c("WholePart", "Free")) +
  scale_x_continuous(expand = c(0,0), limits = c(0,1250)) + 
  scale_y_continuous(limits = c(0,70),expand = c(0,0)) + 
  ylab("Production (μgC/L/hr)") + xlab("Observed Richness (D0)") +
  #geom_smooth(data=subset(ML_otu_rich_stats, fraction == "Free"), method='lm') + 
  geom_smooth(data=subset(ML_otu_rich_stats, fraction == "WholePart"), method='lm') + 
  theme(legend.position=c(0.15,0.9),        
        legend.title=element_blank()) +
  annotate("text", x = 800, y=35, color = "cornflowerblue", fontface = "bold",
           label = paste("R2 =", round(summary(freeprod_ML_otu_rich)$adj.r.squared, digits = 4), "\n", 
                         "p =", round(unname(summary(freeprod_ML_otu_rich)$coefficients[,4][2]), digits = 4))) + 
  annotate("text", x = 900, y=5, color = "firebrick3", fontface = "bold",
           label = paste("R2 =", round(summary(partprod_MLotu_rich)$adj.r.squared, digits = 4), "\n", 
                         "p =", round(unname(summary(partprod_MLotu_rich)$coefficients[,4][2]), digits = 4)));
## Warning: Ignoring unknown parameters: width
######################################################### SHANNON ENTROPY
#  Subset a dataframe with the key information
ML_otu_shannon_stats <- filter(otu_alphadiv, 
                                   measure == "Shannon_Entropy" & 
                                     project == "Muskegon_Lake" & 
                                     fraction %in% c("WholePart", "Free") & 
                                     year == "2015")

# Can fractional production be predicted by simpsevenness? 
free_ML_otu_shannon_stats <- filter(ML_otu_shannon_stats, fraction == "Free")
freeprod_ML_otu_shannon <- lm(frac_bacprod ~ mean, data = free_ML_otu_shannon_stats)
freeprod_ML_otu_shannon
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = free_ML_otu_shannon_stats)
## 
## Coefficients:
## (Intercept)         mean  
##      -23.40        11.54
summary(freeprod_ML_otu_shannon)
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = free_ML_otu_shannon_stats)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -22.745 -10.533  -1.786   7.452  35.270 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)   -23.40      74.45  -0.314    0.760
## mean           11.54      18.05   0.639    0.537
## 
## Residual standard error: 18.02 on 10 degrees of freedom
##   (12 observations deleted due to missingness)
## Multiple R-squared:  0.03925,    Adjusted R-squared:  -0.05682 
## F-statistic: 0.4086 on 1 and 10 DF,  p-value: 0.5371
part_ML_abs_shannon_stats <- filter(ML_otu_shannon_stats, fraction == "WholePart")
partprod_MLotu_shannon <- lm(frac_bacprod ~ mean, data = part_ML_abs_shannon_stats)
partprod_MLotu_shannon
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = part_ML_abs_shannon_stats)
## 
## Coefficients:
## (Intercept)         mean  
##      -37.39        10.23
summary(partprod_MLotu_shannon)
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = part_ML_abs_shannon_stats)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.0141 -3.3200 -0.0846  1.3747 11.7928 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  -37.388     12.998  -2.876  0.01649 * 
## mean          10.226      2.782   3.675  0.00428 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5.649 on 10 degrees of freedom
##   (10 observations deleted due to missingness)
## Multiple R-squared:  0.5746, Adjusted R-squared:  0.5321 
## F-statistic: 13.51 on 1 and 10 DF,  p-value: 0.004278
# Plot 
otu_shannon_vegan <- ggplot(ML_otu_shannon_stats, aes(x=mean, y=frac_bacprod, color = fraction)) + 
  geom_point(size = 3.5) + geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd), width = 0.2) + 
  ggtitle("OTU: Vegan") +
  scale_color_manual(values = c("firebrick3","cornflowerblue"), limits = c("WholePart", "Free")) +
  scale_x_continuous(expand = c(0,0), limits=c(3, 6)) + 
  scale_y_continuous(limits = c(0,70),expand = c(0,0)) + 
  ylab("Production (μgC/L/hr)") + xlab("Shannon Entropy (D1)") +
  #geom_smooth(data=subset(ML_otu_shannon_stats, fraction == "Free"), method='lm') + 
  geom_smooth(data=subset(ML_otu_shannon_stats, fraction == "WholePart"), method='lm') + 
  theme(legend.position=c(0.15,0.9),        
        legend.title=element_blank()) +
  annotate("text", x = 3.5, y=35, color = "cornflowerblue", fontface = "bold",
           label = paste("R2 =", round(summary(freeprod_ML_otu_shannon)$adj.r.squared, digits = 4), "\n", 
                         "p =", round(unname(summary(freeprod_ML_otu_shannon)$coefficients[,4][2]), digits = 4))) + 
  annotate("text", x = 5.35, y=5, color = "firebrick3", fontface = "bold",
           label = paste("R2 =", round(summary(partprod_MLotu_shannon)$adj.r.squared, digits = 4), "\n", 
                         "p =", round(unname(summary(partprod_MLotu_shannon)$coefficients[,4][2]), digits = 4))); 
## Warning: Ignoring unknown parameters: width
######################################################### INVERSE SIMPSON
#  Subset a dataframe with the key information
ML_otu_invsimps_stats <- filter(otu_alphadiv, 
                                   measure == "Inverse_Simpson" & 
                                     project == "Muskegon_Lake" & 
                                     fraction %in% c("WholePart", "Free") & 
                                     year == "2015")

# Can fractional production be predicted by invsimpsness? 
free_ML_otu_invsimps_stats <- filter(ML_otu_invsimps_stats, fraction == "Free")
freeprod_ML_otu_invsimps <- lm(frac_bacprod ~ mean, data = free_ML_otu_invsimps_stats)
freeprod_ML_otu_invsimps
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = free_ML_otu_invsimps_stats)
## 
## Coefficients:
## (Intercept)         mean  
##     11.2033       0.4942
summary(freeprod_ML_otu_invsimps)
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = free_ML_otu_invsimps_stats)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -22.623  -9.985  -1.943   6.863  35.467 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  11.2033    21.9431   0.511    0.621
## mean          0.4942     0.8187   0.604    0.560
## 
## Residual standard error: 18.06 on 10 degrees of freedom
##   (12 observations deleted due to missingness)
## Multiple R-squared:  0.03516,    Adjusted R-squared:  -0.06132 
## F-statistic: 0.3644 on 1 and 10 DF,  p-value: 0.5595
part_ML_abs_invsimps_stats <- filter(ML_otu_invsimps_stats, fraction == "WholePart")
partprod_MLotu_invsimps <- lm(frac_bacprod ~ mean, data = part_ML_abs_invsimps_stats)
partprod_MLotu_invsimps
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = part_ML_abs_invsimps_stats)
## 
## Coefficients:
## (Intercept)         mean  
##     0.01836      0.26372
summary(partprod_MLotu_invsimps)
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = part_ML_abs_invsimps_stats)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.4341 -2.1971 -0.2443  1.3800  7.6631 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  0.01836    2.35150   0.008 0.993925    
## mean         0.26372    0.05148   5.122 0.000449 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.55 on 10 degrees of freedom
##   (10 observations deleted due to missingness)
## Multiple R-squared:  0.724,  Adjusted R-squared:  0.6965 
## F-statistic: 26.24 on 1 and 10 DF,  p-value: 0.0004492
# Plot Simpson's Evenness
otu_invsimps_vegan <- ggplot(ML_otu_invsimps_stats, aes(x=mean, y=frac_bacprod, color = fraction)) + 
  geom_point(size = 3.5) + geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd), width = 0.2) + 
  ggtitle("OTU: Vegan") +
  scale_color_manual(values = c("firebrick3","cornflowerblue"), limits = c("WholePart", "Free")) +
  scale_x_continuous(limits = c(0,100), expand = c(0,0)) + 
  scale_y_continuous(limits = c(0,70),expand = c(0,0)) + 
  ylab("Production (μgC/L/hr)") + xlab("Inverse Simpson (D2)") +
  #geom_smooth(data=subset(ML_otu_invsimps_stats, fraction == "Free"), method='lm') + 
  geom_smooth(data=subset(ML_otu_invsimps_stats, fraction == "WholePart"), method='lm') + 
  theme(legend.position=c(0.85,0.9),        
        legend.title=element_blank()) +
  annotate("text", x = 58, y=35, color = "cornflowerblue", fontface = "bold",
           label = paste("R2 =", round(summary(freeprod_ML_otu_invsimps)$adj.r.squared, digits = 4), "\n", 
                         "p =", round(unname(summary(freeprod_ML_otu_invsimps)$coefficients[,4][2]), digits = 4))) + 
  annotate("text", x = 63, y=5, color = "firebrick3", fontface = "bold",
           label = paste("R2 =", round(summary(partprod_MLotu_invsimps)$adj.r.squared, digits = 4), "\n", 
                         "p =", round(unname(summary(partprod_MLotu_invsimps)$coefficients[,4][2]), digits = 4))); 
## Warning: Ignoring unknown parameters: width
######################################################### SIMPSON'S EVENNESS
#  Subset a dataframe with the key information
ML_otu_simpseven_stats <- filter(otu_alphadiv, 
                                   measure == "Simpsons_Evenness" & 
                                     project == "Muskegon_Lake" & 
                                     fraction %in% c("WholePart", "Free") & 
                                     year == "2015")

# Can fractional production be predicted by simpsevenness? 
free_ML_otu_simpseven_stats <- filter(ML_otu_simpseven_stats, fraction == "Free")
freeprod_ML_otu_simpseven <- lm(frac_bacprod ~ mean, data = free_ML_otu_simpseven_stats)
freeprod_ML_otu_simpseven
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = free_ML_otu_simpseven_stats)
## 
## Coefficients:
## (Intercept)         mean  
##       53.98      -494.38
summary(freeprod_ML_otu_simpseven)
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = free_ML_otu_simpseven_stats)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -22.813 -12.187  -5.198   9.118  34.886 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)    53.98      26.68   2.023   0.0706 .
## mean         -494.38     433.17  -1.141   0.2803  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 17.3 on 10 degrees of freedom
##   (12 observations deleted due to missingness)
## Multiple R-squared:  0.1152, Adjusted R-squared:  0.02677 
## F-statistic: 1.303 on 1 and 10 DF,  p-value: 0.2803
part_ML_abs_simpseven_stats <- filter(ML_otu_simpseven_stats, fraction == "WholePart")
partprod_MLotu_simpseven <- lm(frac_bacprod ~ mean, data = part_ML_abs_simpseven_stats)
partprod_MLotu_simpseven
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = part_ML_abs_simpseven_stats)
## 
## Coefficients:
## (Intercept)         mean  
##      -4.393      276.542
summary(partprod_MLotu_simpseven)
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = part_ML_abs_simpseven_stats)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -11.1679  -1.8119  -0.9444   0.7755  12.6815 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)   -4.393      4.774  -0.920  0.37911   
## mean         276.542     85.310   3.242  0.00885 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.048 on 10 degrees of freedom
##   (10 observations deleted due to missingness)
## Multiple R-squared:  0.5124, Adjusted R-squared:  0.4636 
## F-statistic: 10.51 on 1 and 10 DF,  p-value: 0.008845
# Plot 
otu_simpseven_vegan <- ggplot(ML_otu_simpseven_stats, aes(x=mean, y=frac_bacprod, color = fraction)) + 
  geom_point(size = 3.5) +
  ggtitle("OTU: Vegan") +
  scale_color_manual(values = c("firebrick3","cornflowerblue"), limits = c("WholePart", "Free")) +
  scale_x_continuous(expand = c(0,0), limits=c(0, 0.09)) + 
  scale_y_continuous(limits = c(0,70),expand = c(0,0)) + 
  ylab("Production (μgC/L/hr)") + xlab("Simpson's Evenness") +
  #geom_smooth(data=subset(ML_otu_simpseven_stats, fraction == "Free"), method='lm') + 
  geom_smooth(data=subset(ML_otu_simpseven_stats, fraction == "WholePart"), method='lm') + 
  theme(legend.position=c(0.15,0.9),        
        legend.title=element_blank()) +
  annotate("text", x = 0.03, y=35, color = "cornflowerblue", fontface = "bold",
           label = paste("R2 =", round(summary(freeprod_ML_otu_simpseven)$adj.r.squared, digits = 4), "\n", 
                         "p =", round(unname(summary(freeprod_ML_otu_simpseven)$coefficients[,4][2]), digits = 4))) + 
  annotate("text", x = 0.015, y=7, color = "firebrick3", fontface = "bold",
           label = paste("R2 =", round(summary(partprod_MLotu_simpseven)$adj.r.squared, digits = 4), "\n", 
                         "p =", round(unname(summary(partprod_MLotu_simpseven)$coefficients[,4][2]), digits = 4))); 

otu_vegan <- plot_grid(otu_rich_vegan, otu_simpseven_vegan, otu_shannon_vegan, otu_invsimps_vegan,
          labels = c("A", "B", "C", "D"), 
          align = "h", nrow = 2, ncol = 2)
## Warning: Removed 10 rows containing non-finite values (stat_smooth).
## Warning: Removed 22 rows containing missing values (geom_point).
## Warning: Removed 22 rows containing missing values (geom_errorbarh).
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <ce>
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <bc>
## Warning: Removed 10 rows containing non-finite values (stat_smooth).
## Warning: Removed 22 rows containing missing values (geom_point).
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <ce>
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <bc>
## Warning: Removed 10 rows containing non-finite values (stat_smooth).
## Warning: Removed 22 rows containing missing values (geom_point).
## Warning: Removed 22 rows containing missing values (geom_errorbarh).
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <ce>
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <bc>
## Warning: Removed 10 rows containing non-finite values (stat_smooth).
## Warning: Removed 22 rows containing missing values (geom_point).
## Warning: Removed 22 rows containing missing values (geom_errorbarh).
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <ce>
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <bc>
otu_vegan

#ggsave("Figures/vegan_otu_alpha_vs_prod.png", otu_vegan, dpi = 600, units = "in", width = 10, height = 8)

Oligotyping Analysis

# Load values
nsamp <- nsamples(oligo_merged_pruned)
min_lib <- min(sample_sums(oligo_merged_pruned)) - 1

# Read in the files 
oligo_richness <- read.table("metadata/oligo_richness100_rarefy4331",  header = TRUE)
oligo_evenness <- read.table("metadata/oligo_evenness100_rarefy4331", header = TRUE)
oligo_shannon <- read.table("metadata/oligo_shannon100_rarefy4331", header = TRUE)


# Create a new dataframe to hold the means and standard deviations of richness estimates
norep_filter_name <- row.names(oligo_richness)
mean <- apply(oligo_richness, 1, mean)
sd <- apply(oligo_richness, 1, sd)
measure <- rep("Richness", nsamp)
oligo_rich_stats <- data.frame(norep_filter_name, mean, sd, measure)

# Create a new dataframe to hold the means and standard deviations of evenness estimates
norep_filter_name <- row.names(oligo_evenness)
mean <- apply(oligo_evenness, 1, mean)
sd <- apply(oligo_evenness, 1, sd)
measure <- rep("Inverse_Simpson", nsamp)
oligo_even_stats <- data.frame(norep_filter_name, mean, sd, measure)

# Create a new dataframe to hold the means and standard deviations of Shannon Entropy
norep_filter_name <- row.names(oligo_shannon)
mean <- apply(oligo_shannon, 1, mean)
sd <- apply(oligo_shannon, 1, sd)
measure <- rep("Shannon_Entropy", nsamp)
oligo_shannon_stats <- data.frame(norep_filter_name, mean, sd, measure)

# Calculate Simpson's Evenness into new df called "simps_evenness"
oligo_simps_evenness <- inner_join(oligo_rich_stats, oligo_even_stats, by = "norep_filter_name") %>%
  mutate(mean = mean.y/mean.x,
         sd = sd(mean),
         measure = "Simpsons_Evenness") %>%
  select(norep_filter_name, mean, sd, measure)

# Combine alpha diversity into one dataframe 
oligo_alpha <- rbind(oligo_rich_stats, oligo_even_stats, oligo_simps_evenness, oligo_shannon_stats)
s <- data.frame(sample_data(oligo_merged_pruned))
oligo_alphadiv <- merge(oligo_alpha, s, by = "norep_filter_name") %>%
  filter(project == "Muskegon_Lake")


ggplot(filter(oligo_alphadiv, measure == "Richness"), aes(x = norep_filter_name, y = mean, color = lakesite)) +
  geom_point() + facet_grid(project~fraction, scales = "free") + 
  xlab("Sample Name") + ylab("Richness") +
  theme(axis.text.x = element_text(angle = 30))  #Set the x-axis labels)

ggplot(filter(oligo_alphadiv, measure == "Inverse_Simpson"), aes(x = norep_filter_name, y = mean, color = lakesite)) +
  geom_point() + facet_grid(project~fraction, scales = "free") + 
  xlab("Sample Name") + ylab("Inverse Simpson") +
  theme(axis.text.x = element_text(angle = 30))  #Set the x-axis labels)

ggplot(filter(oligo_alphadiv, measure == "Shannon_Entropy"), aes(x = norep_filter_name, y = mean, color = lakesite)) +
  geom_point() + facet_grid(project~fraction, scales = "free") + 
  xlab("Sample Name") + ylab("Shannon Entropy") +
  theme(axis.text.x = element_text(angle = 30))  #Set the x-axis labels)

######################################################### RICHNESS
#  Subset a dataframe with the key information
ML_oligo_rich_stats <- filter(oligo_alphadiv, 
                              measure == "Richness" & 
                                project == "Muskegon_Lake" & 
                                fraction %in% c("WholePart", "Free") & 
                                year == "2015")


# Can fractional production be predicted by richness? 
free_ML_oligo_rich_stats <- filter(ML_oligo_rich_stats, fraction == "Free")
freeprod_ML_oligo_rich <- lm(frac_bacprod ~ mean, data = free_ML_oligo_rich_stats)
freeprod_ML_oligo_rich
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = free_ML_oligo_rich_stats)
## 
## Coefficients:
## (Intercept)         mean  
##    -30.1079       0.2411
summary(freeprod_ML_oligo_rich)
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = free_ML_oligo_rich_stats)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -22.101 -11.111  -0.630   8.417  36.302 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept) -30.1079    70.8409  -0.425    0.680
## mean          0.2411     0.3144   0.767    0.461
## 
## Residual standard error: 17.87 on 10 degrees of freedom
##   (12 observations deleted due to missingness)
## Multiple R-squared:  0.05554,    Adjusted R-squared:  -0.03891 
## F-statistic: 0.588 on 1 and 10 DF,  p-value: 0.4609
part_ML_abs_rich_stats <- filter(ML_oligo_rich_stats, fraction == "WholePart")
partprod_MLoligo_rich <- lm(frac_bacprod ~ mean, data = part_ML_abs_rich_stats)
partprod_MLoligo_rich
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = part_ML_abs_rich_stats)
## 
## Coefficients:
## (Intercept)         mean  
##    -33.0697       0.1797
summary(partprod_MLoligo_rich)
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = part_ML_abs_rich_stats)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.1210 -4.8977  0.0914  2.5357 12.6906 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)  
## (Intercept) -33.06972   15.95877  -2.072   0.0650 .
## mean          0.17968    0.06609   2.719   0.0216 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.567 on 10 degrees of freedom
##   (9 observations deleted due to missingness)
## Multiple R-squared:  0.425,  Adjusted R-squared:  0.3675 
## F-statistic: 7.391 on 1 and 10 DF,  p-value: 0.02161
# Plot 
oligo_rich_vegan <- ggplot(ML_oligo_rich_stats, aes(x=mean, y=frac_bacprod, color = fraction)) + 
  geom_point(size = 3.5) + geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd), width = 0.2) + 
  ggtitle("Oligotyping: Vegan") +
  scale_color_manual(values = c("firebrick3","cornflowerblue"), limits = c("WholePart", "Free")) +
  #scale_x_continuous(expand = c(0,0)) + 
  scale_y_continuous(limits = c(0,70),expand = c(0,0)) + 
  ylab("Production (μgC/L/hr)") + xlab("Observed Richness (D0)") +
  #geom_smooth(data=subset(ML_oligo_rich_stats, fraction == "Free"), method='lm') + 
  geom_smooth(data=subset(ML_oligo_rich_stats, fraction == "WholePart"), method='lm') + 
  theme(legend.position=c(0.15,0.9),        
        legend.title=element_blank()) +
  annotate("text", x = 210, y=45, color = "cornflowerblue", fontface = "bold",
           label = paste("R2 =", round(summary(freeprod_ML_oligo_rich)$adj.r.squared, digits = 4), "\n", 
                         "p =", round(unname(summary(freeprod_ML_oligo_rich)$coefficients[,4][2]), digits = 4))) + 
  annotate("text", x = 260, y=25, color = "firebrick3", fontface = "bold",
           label = paste("R2 =", round(summary(partprod_MLoligo_rich)$adj.r.squared, digits = 4), "\n", 
                         "p =", round(unname(summary(partprod_MLoligo_rich)$coefficients[,4][2]), digits = 4))); 
## Warning: Ignoring unknown parameters: width
######################################################### SHANNON ENTROPY
#  Subset a dataframe with the key information
ML_oligo_shannon_stats <- filter(oligo_alphadiv, 
                              measure == "Shannon_Entropy" & 
                                project == "Muskegon_Lake" & 
                                fraction %in% c("WholePart", "Free") & 
                                year == "2015")


# Can fractional production be predicted by richness? 
free_ML_oligo_shannon_stats <- filter(ML_oligo_shannon_stats, fraction == "Free")
freeprod_ML_oligo_shannon <- lm(frac_bacprod ~ mean, data = free_ML_oligo_shannon_stats)
freeprod_ML_oligo_shannon
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = free_ML_oligo_shannon_stats)
## 
## Coefficients:
## (Intercept)         mean  
##     -166.70        45.29
summary(freeprod_ML_oligo_shannon)
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = free_ML_oligo_shannon_stats)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -23.794  -6.324  -1.109   5.862  26.839 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  -166.70     115.57  -1.442     0.18
## mean           45.29      27.41   1.652     0.13
## 
## Residual standard error: 16.3 on 10 degrees of freedom
##   (12 observations deleted due to missingness)
## Multiple R-squared:  0.2144, Adjusted R-squared:  0.1359 
## F-statistic: 2.729 on 1 and 10 DF,  p-value: 0.1295
part_ML_abs_shannon_stats <- filter(ML_oligo_shannon_stats, fraction == "WholePart")
partprod_MLoligo_shannon <- lm(frac_bacprod ~ mean, data = part_ML_abs_shannon_stats)
partprod_MLoligo_shannon
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = part_ML_abs_shannon_stats)
## 
## Coefficients:
## (Intercept)         mean  
##      -43.92        12.97
summary(partprod_MLoligo_shannon)
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = part_ML_abs_shannon_stats)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -10.9675  -2.5479  -0.8387   1.3585  11.6538 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)  -43.915     16.918  -2.596  0.02669 * 
## mean          12.970      4.047   3.205  0.00942 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.083 on 10 degrees of freedom
##   (9 observations deleted due to missingness)
## Multiple R-squared:  0.5067, Adjusted R-squared:  0.4573 
## F-statistic: 10.27 on 1 and 10 DF,  p-value: 0.009417
# Plot 
oligo_shannon_vegan <- ggplot(ML_oligo_shannon_stats, aes(x=mean, y=frac_bacprod, color = fraction)) + 
  geom_point(size = 3.5) + geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd), width = 0.2) + 
  ggtitle("Oligotyping: Vegan") +
  scale_color_manual(values = c("firebrick3","cornflowerblue"), limits = c("WholePart", "Free")) +
  #scale_x_continuous(expand = c(0,0)) + 
  scale_y_continuous(limits = c(0,70),expand = c(0,0)) + 
  ylab("Production (μgC/L/hr)") + xlab("Shannon Entropy (D1)") +
  #geom_smooth(data=subset(ML_oligo_shannon_stats, fraction == "Free"), method='lm') + 
  geom_smooth(data=subset(ML_oligo_shannon_stats, fraction == "WholePart"), method='lm') + 
  theme(legend.position=c(0.15,0.9),        
        legend.title=element_blank()) +
  annotate("text", x = 3.9, y=35, color = "cornflowerblue", fontface = "bold",
           label = paste("R2 =", round(summary(freeprod_ML_oligo_shannon)$adj.r.squared, digits = 4), "\n", 
                         "p =", round(unname(summary(freeprod_ML_oligo_shannon)$coefficients[,4][2]), digits = 4))) + 
  annotate("text", x = 4.7, y=5, color = "firebrick3", fontface = "bold",
           label = paste("R2 =", round(summary(partprod_MLoligo_shannon)$adj.r.squared, digits = 4), "\n", 
                         "p =", round(unname(summary(partprod_MLoligo_shannon)$coefficients[,4][2]), digits = 4))); 
## Warning: Ignoring unknown parameters: width
######################################################### INVERSE SIMPSON
#  Subset a dataframe with the key information
ML_oligo_invsimps_stats <- filter(oligo_alphadiv, 
                                   measure == "Inverse_Simpson" & 
                                     project == "Muskegon_Lake" & 
                                     fraction %in% c("WholePart", "Free") & 
                                     year == "2015")

# Can fractional production be predicted by invsimpsness? 
free_ML_oligo_invsimps_stats <- filter(ML_oligo_invsimps_stats, fraction == "Free")
freeprod_ML_oligo_invsimps <- lm(frac_bacprod ~ mean, data = free_ML_oligo_invsimps_stats)
freeprod_ML_oligo_invsimps
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = free_ML_oligo_invsimps_stats)
## 
## Coefficients:
## (Intercept)         mean  
##     -7.4198       0.9455
summary(freeprod_ML_oligo_invsimps)
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = free_ML_oligo_invsimps_stats)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -20.025  -7.628  -1.606   3.779  30.021 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  -7.4198    15.6884  -0.473   0.6464  
## mean          0.9455     0.4519   2.092   0.0629 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 15.34 on 10 degrees of freedom
##   (12 observations deleted due to missingness)
## Multiple R-squared:  0.3045, Adjusted R-squared:  0.2349 
## F-statistic: 4.377 on 1 and 10 DF,  p-value: 0.06289
part_ML_abs_invsimps_stats <- filter(ML_oligo_invsimps_stats, fraction == "WholePart")
partprod_MLoligo_invsimps <- lm(frac_bacprod ~ mean, data = part_ML_abs_invsimps_stats)
partprod_MLoligo_invsimps
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = part_ML_abs_invsimps_stats)
## 
## Coefficients:
## (Intercept)         mean  
##     -2.1739       0.3796
summary(partprod_MLoligo_invsimps)
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = part_ML_abs_invsimps_stats)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.8628 -2.5078 -0.7671  2.1398  7.7541 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) -2.17388    2.80930  -0.774 0.456943    
## mean         0.37965    0.07681   4.942 0.000585 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.668 on 10 degrees of freedom
##   (9 observations deleted due to missingness)
## Multiple R-squared:  0.7095, Adjusted R-squared:  0.6805 
## F-statistic: 24.43 on 1 and 10 DF,  p-value: 0.000585
# Plot Simpson's Evenness
oligo_invsimps_vegan <- ggplot(ML_oligo_invsimps_stats, aes(x=mean, y=frac_bacprod, color = fraction)) + 
  geom_point(size = 3.5) + geom_errorbarh(aes(xmin = mean - sd, xmax = mean + sd), width = 0.2) + 
  ggtitle("Oligotyping: Vegan") +
  scale_color_manual(values = c("firebrick3","cornflowerblue"), limits = c("WholePart", "Free")) +
  #scale_x_continuous(expand = c(0,0)) + 
  scale_y_continuous(limits = c(0,70),expand = c(0,0)) + 
  ylab("Production (μgC/L/hr)") + xlab("Inverse Simpson Index") +
  geom_smooth(data=subset(ML_oligo_invsimps_stats, fraction == "Free"), method='lm') + 
  geom_smooth(data=subset(ML_oligo_invsimps_stats, fraction == "WholePart"), method='lm') + 
  theme(legend.position=c(0.15,0.9),        
        legend.title=element_blank()) +
  annotate("text", x = 58, y=35, color = "cornflowerblue", fontface = "bold",
           label = paste("R2 =", round(summary(freeprod_ML_oligo_invsimps)$adj.r.squared, digits = 4), "\n", 
                         "p =", round(unname(summary(freeprod_ML_oligo_invsimps)$coefficients[,4][2]), digits = 4))) + 
  annotate("text", x = 63, y=5, color = "firebrick3", fontface = "bold",
           label = paste("R2 =", round(summary(partprod_MLoligo_invsimps)$adj.r.squared, digits = 4), "\n", 
                         "p =", round(unname(summary(partprod_MLoligo_invsimps)$coefficients[,4][2]), digits = 4))); 
## Warning: Ignoring unknown parameters: width
######################################################### SIMPSON'S EVENNESS
#  Subset a dataframe with the key information
ML_oligo_simpseven_stats <- filter(oligo_alphadiv, 
                                   measure == "Simpsons_Evenness" & 
                                     project == "Muskegon_Lake" & 
                                     fraction %in% c("WholePart", "Free") & 
                                     year == "2015")

# Can fractional production be predicted by simpsevenness? 
free_ML_oligo_simpseven_stats <- filter(ML_oligo_simpseven_stats, fraction == "Free")
freeprod_ML_oligo_simpseven <- lm(frac_bacprod ~ mean, data = free_ML_oligo_simpseven_stats)
freeprod_ML_oligo_simpseven
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = free_ML_oligo_simpseven_stats)
## 
## Coefficients:
## (Intercept)         mean  
##      -1.932      174.827
summary(freeprod_ML_oligo_simpseven)
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = free_ML_oligo_simpseven_stats)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -18.188  -9.173  -3.443   5.543  28.901 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)   -1.932     16.112  -0.120    0.907
## mean         174.827    103.655   1.687    0.123
## 
## Residual standard error: 16.22 on 10 degrees of freedom
##   (12 observations deleted due to missingness)
## Multiple R-squared:  0.2215, Adjusted R-squared:  0.1436 
## F-statistic: 2.845 on 1 and 10 DF,  p-value: 0.1226
part_ML_abs_simpseven_stats <- filter(ML_oligo_simpseven_stats, fraction == "WholePart")
partprod_MLoligo_simpseven <- lm(frac_bacprod ~ mean, data = part_ML_abs_simpseven_stats)
partprod_MLoligo_simpseven
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = part_ML_abs_simpseven_stats)
## 
## Coefficients:
## (Intercept)         mean  
##      -4.501      112.772
summary(partprod_MLoligo_simpseven)
## 
## Call:
## lm(formula = frac_bacprod ~ mean, data = part_ML_abs_simpseven_stats)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -7.8889 -3.4521 -0.8976  2.5784  7.5087 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)   -4.501      3.517  -1.280  0.22945   
## mean         112.772     24.956   4.519  0.00111 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 4.966 on 10 degrees of freedom
##   (9 observations deleted due to missingness)
## Multiple R-squared:  0.6713, Adjusted R-squared:  0.6384 
## F-statistic: 20.42 on 1 and 10 DF,  p-value: 0.00111
# Plot Simpson's Evenness
oligo_simpseven_vegan <- ggplot(ML_oligo_simpseven_stats, aes(x=mean, y=frac_bacprod, color = fraction)) + 
  geom_point(size = 3.5) + 
  ggtitle("Oligotyping: Vegan") +
  scale_color_manual(values = c("firebrick3","cornflowerblue"), limits = c("WholePart", "Free")) +
  #scale_x_continuous(expand = c(0,0)) + 
  scale_y_continuous(limits = c(0,70),expand = c(0,0)) + 
  ylab("Production (μgC/L/hr)") + xlab("Simpson's Evenness") +
  #geom_smooth(data=subset(ML_oligo_simpseven_stats, fraction == "Free"), method='lm') + 
  geom_smooth(data=subset(ML_oligo_simpseven_stats, fraction == "WholePart"), method='lm') + 
  theme(legend.position=c(0.15,0.9),        
        legend.title=element_blank()) +
  annotate("text", x = 0.105, y=25, color = "cornflowerblue", fontface = "bold",
           label = paste("R2 =", round(summary(freeprod_ML_oligo_simpseven)$adj.r.squared, digits = 4), "\n", 
                         "p =", round(unname(summary(freeprod_ML_oligo_simpseven)$coefficients[,4][2]), digits = 4))) + 
  annotate("text", x = 0.20, y=5, color = "firebrick3", fontface = "bold",
           label = paste("R2 =", round(summary(partprod_MLoligo_simpseven)$adj.r.squared, digits = 4), "\n", 
                         "p =", round(unname(summary(partprod_MLoligo_simpseven)$coefficients[,4][2]), digits = 4))); 

oligotyping_vegan <- plot_grid(oligo_rich_vegan, oligo_simpseven_vegan,  oligo_shannon_vegan, oligo_invsimps_vegan,
          labels = c("A", "B", "C", "D"), 
          align = "h", nrow = 2, ncol = 2)
## Warning: Removed 9 rows containing non-finite values (stat_smooth).
## Warning: Removed 21 rows containing missing values (geom_point).
## Warning: Removed 21 rows containing missing values (geom_errorbarh).
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <ce>
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <bc>
## Warning: Removed 9 rows containing non-finite values (stat_smooth).
## Warning: Removed 21 rows containing missing values (geom_point).
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <ce>
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <bc>
## Warning: Removed 9 rows containing non-finite values (stat_smooth).
## Warning: Removed 21 rows containing missing values (geom_point).
## Warning: Removed 21 rows containing missing values (geom_errorbarh).
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <ce>
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <bc>
## Warning: Removed 12 rows containing non-finite values (stat_smooth).
## Warning: Removed 9 rows containing non-finite values (stat_smooth).
## Warning: Removed 21 rows containing missing values (geom_point).
## Warning: Removed 21 rows containing missing values (geom_errorbarh).
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <ce>
## Warning in grid.Call(L_stringMetric, as.graphicsAnnot(x$label)): conversion
## failure on 'Production (μgC/L/hr)' in 'mbcsToSbcs': dot substituted for
## <bc>
oligotyping_vegan

#ggsave("Figures/vegan_oligo_alpha_vs_prod.png", oligotyping_vegan, dpi = 600, units = "in", width = 10, height = 8)

Stacked Bar plots for taxonomic structure

OTU Phylum Level Analysis

musk_scaled_otu_merged_pruned <- subset_samples(scaled_otu_merged_pruned, project == "Muskegon_Lake" & 
                                           year == "2015" &
                                           limnion == "Top" &
                                           fraction %in% c("WholePart", "Free"))

# Fix month levels in sample_data
sample_data(musk_scaled_otu_merged_pruned)$fraction <- factor(
  sample_data(musk_scaled_otu_merged_pruned)$fraction, 
  levels = c("WholePart", "Free")
)

sample_data(musk_scaled_otu_merged_pruned)$lakesite <- factor(
  sample_data(musk_scaled_otu_merged_pruned)$lakesite, 
  levels = c("MOT", "MDP", "MBR", "MIN")
)



musk_scaled_otu_merged_pruned_phylum <- musk_scaled_otu_merged_pruned %>%
  tax_glom(taxrank = "Phylum") %>%                     # agglomerate at phylum level
  transform_sample_counts(function(x) {x/sum(x)} ) %>% # Transform to rel. abundance
  psmelt() %>%                                         # Melt to long format
  filter(Abundance > 0.02) %>%                         # Filter out low abundance taxa
  arrange(Phylum)                                      # Sort data frame alphabetically by phylum


# Set colors for plotting
phylum_colors <- c(
  "#CBD588", "#5F7FC7", "orange","#DA5724", "#508578", "plum1",
   "firebrick4", "white", "#D14285","seagreen3", "gold", 
  "#8569D5", "firebrick1", "peachpuff"
)


# Plot 
ggplot(musk_scaled_otu_merged_pruned_phylum, aes(x = season, y = Abundance, fill = Phylum)) + 
  facet_grid(fraction~lakesite) +
  geom_bar(stat = "identity", color = "black") +
  scale_fill_manual(values = phylum_colors) +
  guides(fill = guide_legend(reverse = FALSE, keywidth = 1, keyheight = 1)) +
  ylab("Relative Abundance (Phyla > 2%) \n") +
  ggtitle("Phylum Composition of 2015 Muskegon Lake \n Bacterial Communities by Sampling Site") +
  theme(strip.background = element_rect(fill = "white"),
        axis.text.x = element_text(colour = "black", angle=45, hjust = 1, vjust = 1))

Oligotyping Phylum level analysis

musk_scaled_oligo_merged_pruned <- subset_samples(scaled_oligo_merged_pruned, project == "Muskegon_Lake" & 
                                           year == "2015" &
                                           limnion == "Top" &
                                           fraction %in% c("WholePart", "Free"))

# Fix month levels in sample_data
sample_data(musk_scaled_oligo_merged_pruned)$fraction <- factor(
  sample_data(musk_scaled_oligo_merged_pruned)$fraction, 
  levels = c("WholePart", "Free")
)

sample_data(musk_scaled_oligo_merged_pruned)$lakesite <- factor(
  sample_data(musk_scaled_oligo_merged_pruned)$lakesite, 
  levels = c("MOT", "MDP", "MBR", "MIN")
)



musk_scaled_oligo_merged_pruned_phylum <- musk_scaled_oligo_merged_pruned %>%
  tax_glom(taxrank = "Phylum") %>%                     # agglomerate at phylum level
  transform_sample_counts(function(x) {x/sum(x)} ) %>% # Transform to rel. abundance
  psmelt() %>%                                         # Melt to long format
  filter(Abundance > 0.02) %>%                         # Filter out low abundance taxa
  arrange(Phylum)                                      # Sort data frame alphabetically by phylum


# Set colors for plotting
phylum_colors <- c(
  "#CBD588", "#5F7FC7", "orange","#DA5724", "#508578", "plum1",
   "firebrick4", "white", "#D14285","seagreen3", "gold", 
  "#8569D5", "firebrick1", "peachpuff"
)


# Plot 
ggplot(musk_scaled_oligo_merged_pruned_phylum, aes(x = season, y = Abundance, fill = Phylum)) + 
  facet_grid(fraction~lakesite) +
  geom_bar(stat = "identity", color = "black") +
  scale_fill_manual(values = phylum_colors) +
  guides(fill = guide_legend(reverse = FALSE, keywidth = 1, keyheight = 1)) +
  ylab("Relative Abundance (Phyla > 2%) \n") +
  ggtitle("Oligotyping Phylum Composition of 2015 Muskegon Lake \n Bacterial Communities by Sampling Site") +
  theme(strip.background = element_rect(fill = "white"),
        axis.text.x = element_text(colour = "black", angle=45, hjust = 1, vjust = 1))

Phylum Diversity plots

# ######################################################### RICHNESS
# #  Subset a dataframe with the key information
# ML_oligo_rich_stats <- filter(oligo_alphadiv, 
#                               measure == "Richness" & 
#                                 project == "Muskegon_Lake" & 
#                                 fraction %in% c("WholePart", "Free") & 
#                                 year == "2015")
# 
# 
# # Can fractional production be predicted by richness? 
# free_ML_oligo_rich_stats <- filter(ML_oligo_rich_stats, fraction == "Free")
# freeprod_ML_oligo_rich <- lm(frac_bacprod ~ mean, data = free_ML_oligo_rich_stats)
# freeprod_ML_oligo_rich
# summary(freeprod_ML_oligo_rich)
# 
# part_ML_abs_rich_stats <- filter(ML_oligo_rich_stats, fraction == "WholePart")
# partprod_MLoligo_rich <- lm(frac_bacprod ~ mean, data = part_ML_abs_rich_stats)
# partprod_MLoligo_rich
# summary(partprod_MLoligo_rich)

# Plot 
ggplot(filter(alpha_comb_final, Taxa == "Cyanobacteria"), 
       aes(x=Estimate, y=frac_bacprod, color = fraction)) + 
  geom_point(size = 3.5) + 
  ggtitle("OTU: Cyanobacteria") +
  scale_color_manual(values = c("firebrick3","cornflowerblue"), limits = c("WholePart", "Free")) +
  facet_wrap( ~ Alphadiv, ncol=2, scales="free_x") +
  #scale_x_continuous(expand = c(0,0)) + 
  #scale_y_continuous(limits = c(0,70),expand = c(0,0)) + 
  ylab("Production (μgC/L/hr)") + xlab("Observed Richness (D0)") +
  #geom_smooth(data=subset(ML_oligo_rich_stats, fraction == "Free"), method='lm') + 
  #geom_smooth(data=subset(ML_oligo_rich_stats, fraction == "WholePart"), method='lm') + 
  theme(legend.position="right",        
        legend.title=element_blank())
## Error in filter_(.data, .dots = lazyeval::lazy_dots(...)): object 'alpha_comb_final' not found
 # annotate("text", x = 210, y=45, color = "cornflowerblue", fontface = "bold",
#           label = paste("R2 =", round(summary(freeprod_ML_oligo_rich)$adj.r.squared, digits = 4), "\n", 
#                         "p =", round(unname(summary(freeprod_ML_oligo_rich)$coefficients[,4][2]), digits = 4))) + 
#  annotate("text", x = 260, y=25, color = "firebrick3", fontface = "bold",
#           label = paste("R2 =", round(summary(partprod_MLoligo_rich)$adj.r.squared, digits = 4), "\n", 
#                         "p =", round(unname(summary(partprod_MLoligo_rich)$coefficients[,4][2]), digits = 4))); oligo_rich_vegan

Production

prod_data <- data.frame(sample_data(musk_scaled_otu_merged_pruned))


ggplot(prod_data, aes(x = season, y = tot_bacprod, color = lakesite, group = lakesite, linetype = lakesite)) + 
  geom_point(size = 3.5) +   geom_line(size = 2) +
  ylab("Total Bacterial\n Production") +
  theme(axis.title.x = element_blank(),
        axis.text.x = element_text(colour = "black", angle=15, hjust = 1, vjust = 1))

#### Plot many environ variables
tot_prod_plot <- ggplot(prod_data, aes(x = season, y = tot_bacprod, color = lakesite, group = lakesite)) + 
  facet_grid(.~lakesite) +
  geom_point(size = 3.5) + geom_line() +
  ylab("Total Bacterial\n Production") +
  theme(strip.background = element_rect(fill = "white"),
        axis.title.x = element_blank(),
        axis.text.x = element_text(colour = "black", angle=15, hjust = 1, vjust = 1),
        legend.position="none")

chla_plot <- ggplot(prod_data, aes(x = season, y = Chl_Lab_ugperL, color = lakesite, group = lakesite)) + 
  facet_grid(.~lakesite) +
  geom_point(size = 3.5) + geom_line() +
  ylab("Chlorophyll a") +
  theme(strip.background = element_rect(fill = "white"),
        axis.title.x = element_blank(),
        axis.text.x = element_text(colour = "black", angle=15, hjust = 1, vjust = 1),
        legend.position="none")

ph_plot <- ggplot(prod_data, aes(x = season, y = pH, color = lakesite, group = lakesite)) + 
  facet_grid(.~lakesite) +
  geom_point(size = 3.5) + geom_line() +
  ylab("pH") +
  theme(strip.background = element_rect(fill = "white"),
        axis.title.x = element_blank(),
        axis.text.x = element_text(colour = "black", angle=15, hjust = 1, vjust = 1),
        legend.position="none")

tp_plot <- ggplot(prod_data, aes(x = season, y = TP_ugperL, color = lakesite, group = lakesite)) + 
  facet_grid(.~lakesite) +
  geom_point(size = 3.5) + geom_line() +
  ylab("TP_ugperL") +
  theme(strip.background = element_rect(fill = "white"),
        axis.title.x = element_blank(),
        axis.text.x = element_text(colour = "black", angle=15, hjust = 1, vjust = 1),
        legend.position="none")

no3_plot <- ggplot(prod_data, aes(x = season, y = NO3_mgperL, color = lakesite, group = lakesite)) + 
  facet_grid(.~lakesite) +
  geom_point(size = 3.5) + geom_line() +
  ylab("NO3_mgperL") +
  theme(strip.background = element_rect(fill = "white"),
        axis.title.x = element_blank(),
        axis.text.x = element_text(colour = "black", angle=15, hjust = 1, vjust = 1),
        legend.position="none")


nh3_plot <- ggplot(prod_data, aes(x = season, y = NH3_mgperL, color = lakesite, group = lakesite)) + 
  facet_grid(.~lakesite) +
  geom_point(size = 3.5) + geom_line() +
  ylab("NH3_mgperL") +
  theme(strip.background = element_rect(fill = "white"),
        axis.title.x = element_blank(),
        axis.text.x = element_text(colour = "black", angle=15, hjust = 1, vjust = 1),
        legend.position="none")

environ_seasons <- plot_grid(tot_prod_plot, chla_plot, ph_plot, tp_plot, no3_plot, nh3_plot,
          labels = c("A", "B", "C", "D", "E", "F"), 
          align = "h", nrow = 6, ncol = 1)

environ_seasons

Phylum production analysis

# Michelle Berry's subsetting function, similar to phyloseq::subset_taxa, except taxa can 
# be passed as arguments within functions without weird environment errors
#
# Args:
#   physeq: a phyloseq object
#   taxrank: taxonomic rank to filter on
#   taxa: a vector of taxa groups to filter on
#
# Returns: 
#   a phyloseq object subsetted to the x taxa in taxrank
my_subset_taxa <- function(physeq, taxrank, taxa) {
  physeq_tax_sub <- tax_table(physeq)[tax_table(physeq)[ , taxrank] %in% taxa, ]
  tax_table(physeq) <- physeq_tax_sub
  return(physeq)
}





# Initialize parameters
trials <- 100
min_lib <- min(sample_sums(musk_scaled_otu_merged_pruned)) # Depth we are rarefying to

# Groups to estimate alpha diversity for 
mytaxa <- c("Acidobacteria", "Actinobacteria", "Alphaproteobacteria", "Armatimonadetes",
            "Bacteroidetes", "Betaproteobacteria", "Chlorobi", "Chloroflexi",
            "Cyanobacteria","Deltaproteobacteria", "Gammaproteobacteria", 
            "Planctomycetes","Verrucomicrobia")
names(mytaxa) <- mytaxa

# Taxonomic ranks of mytaxa
mytaxa_taxrank <- c("Phylum", "Phylum", "Phylum", "Phylum", "Phylum", "Phylum", 
                      "Phylum", "Phylum", "Phylum", "Phylum", "Phylum", "Phylum", "Phylum")
names(mytaxa_taxrank) <- mytaxa

# Data frame to hold alpha diversity estimates over trials
alphadiv_df <- data.frame(matrix(nrow = nsamples(musk_scaled_otu_merged_pruned), ncol = trials))

# Initialize empty df's for richness and evenness of all taxa in mytaxa
richness <- lapply(mytaxa, function(x) {return(alphadiv_df)} )
shannon <- lapply(mytaxa, function(x) {return(alphadiv_df)} )
invsimps <- lapply(mytaxa, function(x) {return(alphadiv_df)} )
simpseven <- lapply(mytaxa, function(x) {return(alphadiv_df)} )

alphadiv_list <- list(richness = richness, shannon = shannon, invsimps = invsimps, simpseven = simpseven)


# Set the seed for reproducibility
set.seed(3)

# Run trials to subsample and estimate diversity
for (i in 1:100) {
  
  # Subsample
  rarefied_physeq <- rarefy_even_depth(musk_scaled_otu_merged_pruned, sample.size = min_lib, verbose = FALSE, replace = TRUE)
  
  # Generate alpha-diversity estimates for each taxonomic group
  for (t in mytaxa) {

    # Subset the taxa
    physeq_sub <- my_subset_taxa(physeq = rarefied_physeq, 
                                 taxrank = mytaxa_taxrank[t], 
                                 taxa = t)
    
    # Calculate observed richness for that group and store value in a df column
    richness <- estimate_richness(physeq_sub, measures = "Observed")[ ,1]
    alphadiv_list$richness[[t]][ ,i] <- richness
    
    # Calculate observed shannon for that group and store value in a df column
    shannon <- estimate_richness(physeq_sub, measures = "Shannon")[ ,1]
    alphadiv_list$shannon[[t]][ ,i] <- shannon
    
    # Calculate observed invsimps for that group and store value in a df column
    invsimps <- estimate_richness(physeq_sub, measures = "InvSimpson")[ ,1]
    alphadiv_list$invsimps[[t]][ ,i] <- invsimps
     
    # Calculate Simpson's Evenness for that group and store value in a df column
    alphadiv_list$simpseven[[t]][ ,i] <- (estimate_richness(physeq_sub, measures = "InvSimpson")[ ,1]/richness)

  }
}

# Calculate the means of richness and inverse simpson from the 100 trials
alphadiv_est <- lapply(alphadiv_list, function(div_measure) {
    lapply(div_measure, function(taxa_group) {
        alpha_mean <- rowMeans(taxa_group)
        return(alpha_mean)
    })  
})

# Convert alphadiv_est richness and simpson's E lists into wide data frames
l <- lapply(alphadiv_est, function(x) {
  # convert from list to data.frame
  est_df <- plyr::ldply(.data = x, .fun = data.frame)
  names(est_df) <- c("Taxa", "Diversity")
  
  # Add in SampleID column and spread to wide format
  r <- est_df %>%
    mutate(norep_filter_name = rep(sample_names(musk_scaled_otu_merged_pruned), length(mytaxa)))
  return(r)
})

# Merge sample metadata with these estimates
merge_dat <- data.frame(sample_data(musk_scaled_otu_merged_pruned)) %>%
  select(norep_filter_name, Chl_Lab_ugperL, TP_ugperL, pH, frac_bacprod, NH3_mgperL, NO3_mgperL,
         lakesite, fraction, season) #%>%
  #mutate(logChla = log(Chla)) %>%
  #mutate(logPhyco = log(Phycocyanin + 0.1)) %>%
  #mutate(logTP = log(TP)) %>%
  #mutate(logTurb = log(Turbidity))

# Create a df with a "Diversity" column that includes richness and inv. simpson,
# and log-chl a values from erie sample_data
alpha_comb1 <- l$richness %>% 
  left_join(y = l$shannon, by = c("Taxa", "norep_filter_name")) %>%   # Join the richness and inv_simp df's
  rename(Richness = Diversity.x, Shannon = Diversity.y) 

alpha_comb2 <- l$invsimps %>%   
  left_join(y = l$simpseven, by = c("Taxa", "norep_filter_name")) %>%   # Join the richness and inv_simp df's
  rename(Inverse_Simpson = Diversity.x, Simpsons_Evenness = Diversity.y) 

alpha_comb_final <- alpha_comb1 %>%
  left_join(y = alpha_comb2, by = c("Taxa", "norep_filter_name")) %>%
  left_join(merge_dat, by = "norep_filter_name") %>%                  # Join with merged nutrient data
  gather(key = "Alphadiv", value = "Estimate", Richness, Shannon, Inverse_Simpson, Simpsons_Evenness)